diff --git a/.bandit.yml b/.bandit.yml
new file mode 100644
index 000000000..243379b0b
--- /dev/null
+++ b/.bandit.yml
@@ -0,0 +1,18 @@
+skips:
+- B101
+- B105
+- B301
+- B303
+- B306
+- B307
+- B311
+- B320
+- B321
+- B402  # https://github.com/scrapy/scrapy/issues/4180
+- B403
+- B404
+- B406
+- B410
+- B503
+- B603
+- B605
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index d373d676a..3c1c8f891 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,8 +1,7 @@
 [bumpversion]
-current_version = 0.25.1
+current_version = 2.3.0
 commit = True
 tag = True
 tag_name = {new_version}
 
 [bumpversion:file:scrapy/VERSION]
-
diff --git a/.coveragerc b/.coveragerc
index 3baaf659a..02acbff8e 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -1,3 +1,5 @@
 [run]
+branch = true
 include = scrapy/*
-omit = scrapy/xlib*,scrapy/tests*
+omit =
+  tests/*
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 000000000..dfbdf4208
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+tests/sample_data/** binary
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 000000000..8ca10109b
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,41 @@
+---
+name: Bug report
+about: Report a problem to help us improve
+---
+
+<!--
+
+Thanks for taking an interest in Scrapy!
+
+If you have a question that starts with "How to...", please see the Scrapy Community page: https://scrapy.org/community/.
+The GitHub issue tracker's purpose is to deal with bug reports and feature requests for the project itself.
+
+Keep in mind that by filing an issue, you are expected to comply with Scrapy's Code of Conduct, including treating everyone with respect: https://github.com/scrapy/scrapy/blob/master/CODE_OF_CONDUCT.md
+
+The following is a suggested template to structure your issue, you can find more guidelines at https://doc.scrapy.org/en/latest/contributing.html#reporting-bugs
+
+-->
+
+### Description
+
+[Description of the issue]
+
+### Steps to Reproduce
+
+1. [First Step]
+2. [Second Step]
+3. [and so on...]
+
+**Expected behavior:** [What you expect to happen]
+
+**Actual behavior:** [What actually happens]
+
+**Reproduces how often:** [What percentage of the time does it reproduce?]
+
+### Versions
+
+Please paste here the output of executing `scrapy version --verbose` in the command line.
+
+### Additional context
+
+Any additional information, configuration, data or output from commands that might be necessary to reproduce or understand the issue. Please try not to include screenshots of code or the command line, paste the contents as text instead. You can use [GitHub Flavored Markdown](https://help.github.com/en/articles/creating-and-highlighting-code-blocks) to make the text look better.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 000000000..e05273fe2
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,33 @@
+---
+name: Feature request
+about: Suggest an idea for an enhancement or new feature
+---
+
+<!--
+
+Thanks for taking an interest in Scrapy!
+
+If you have a question that starts with "How to...", please see the Scrapy Community page: https://scrapy.org/community/.
+The GitHub issue tracker's purpose is to deal with bug reports and feature requests for the project itself.
+
+Keep in mind that by filing an issue, you are expected to comply with Scrapy's Code of Conduct, including treating everyone with respect: https://github.com/scrapy/scrapy/blob/master/CODE_OF_CONDUCT.md
+
+The following is a suggested template to structure your pull request, you can find more guidelines at https://doc.scrapy.org/en/latest/contributing.html#writing-patches and https://doc.scrapy.org/en/latest/contributing.html#submitting-patches
+
+-->
+
+## Summary
+
+One paragraph explanation of the feature.
+
+## Motivation
+
+Why are we doing this? What use cases does it support? What is the expected outcome?
+
+## Describe alternatives you've considered
+
+A clear and concise description of the alternative solutions you've considered. Be sure to explain why Scrapy's existing customizability isn't suitable for this feature.
+
+## Additional context
+
+Any additional information about the feature request here.
diff --git a/.gitignore b/.gitignore
index 4eb80012f..83a2569dd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
+/.vagrant
+/scrapy.iml
 *.pyc
 _trial_temp*
 dropin.cache
@@ -8,3 +10,12 @@ venv
 build
 dist
 .idea
+htmlcov/
+.coverage
+.pytest_cache/
+.coverage.*
+.cache/
+.mypy_cache/
+
+# Windows
+Thumbs.db
diff --git a/.readthedocs.yml b/.readthedocs.yml
new file mode 100644
index 000000000..e4d3f02cc
--- /dev/null
+++ b/.readthedocs.yml
@@ -0,0 +1,12 @@
+version: 2
+formats: all
+sphinx:
+  configuration: docs/conf.py
+  fail_on_warning: true
+python:
+  # For available versions, see:
+  # https://docs.readthedocs.io/en/stable/config-file/v2.html#build-image
+  version: 3.7  # Keep in sync with .travis.yml
+  install:
+    - requirements: docs/requirements.txt
+    - path: .
diff --git a/.travis-workarounds.sh b/.travis-workarounds.sh
deleted file mode 100755
index 5c34e54f7..000000000
--- a/.travis-workarounds.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/bash
-set -e
-set -x
-
-if [[ "${TOXENV}" == "pypy" ]]; then
-    sudo add-apt-repository -y ppa:pypy/ppa
-    sudo apt-get -qy update
-    sudo apt-get install -y pypy pypy-dev
-    # This is required because we need to get rid of the Travis installed PyPy
-    # or it'll take precedence over the PPA installed one.
-    sudo rm -rf /usr/local/pypy/bin
-fi
-
-# Workaround travis-ci/travis-ci#2065
-pip install -U wheel
diff --git a/.travis.yml b/.travis.yml
index b30d13bed..33a920bb6 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,19 +1,74 @@
 language: python
-python: 2.7
-env:
-- TOXENV=py27
-- TOXENV=precise
-- TOXENV=py33
+dist: xenial
+branches:
+  only:
+    - master
+    - /^\d\.\d+$/
+    - /^\d\.\d+\.\d+(rc\d+|\.dev\d+)?$/
+matrix:
+  include:
+    - env: TOXENV=security
+      python: 3.8
+    - env: TOXENV=flake8
+      python: 3.8
+    - env: TOXENV=pylint
+      python: 3.8
+    - env: TOXENV=docs
+      python: 3.7  # Keep in sync with .readthedocs.yml
+    - env: TOXENV=typing
+      python: 3.8
+
+    - env: TOXENV=pinned
+      python: 3.5.2
+    - env: TOXENV=asyncio-pinned
+      python: 3.5.2  # We use additional code to support 3.5.3 and earlier
+    - env: TOXENV=pypy3-pinned PYPY_VERSION=3-v5.9.0
+
+    - env: TOXENV=py
+      python: 3.5
+    - env: TOXENV=asyncio
+      python: 3.5  # We use specific code to support >= 3.5.4, < 3.6
+    - env: TOXENV=pypy3 PYPY_VERSION=3.5-v7.0.0
+
+    - env: TOXENV=py
+      python: 3.6
+    - env: TOXENV=pypy3 PYPY_VERSION=3.6-v7.3.1
+
+    - env: TOXENV=py
+      python: 3.7
+
+    - env: TOXENV=py PYPI_RELEASE_JOB=true
+      python: 3.8
+      dist: bionic
+    - env: TOXENV=extra-deps
+      python: 3.8
+      dist: bionic
+    - env: TOXENV=asyncio
+      python: 3.8
+      dist: bionic
 install:
-- "./.travis-workarounds.sh"
-- pip install -U tox
+  - |
+      if [[ ! -z "$PYPY_VERSION" ]]; then
+        export PYPY_VERSION="pypy$PYPY_VERSION-linux64"
+        wget "https://downloads.python.org/pypy/${PYPY_VERSION}.tar.bz2"
+        tar -jxf ${PYPY_VERSION}.tar.bz2
+        virtualenv --python="$PYPY_VERSION/bin/pypy3" "$HOME/virtualenvs/$PYPY_VERSION"
+        source "$HOME/virtualenvs/$PYPY_VERSION/bin/activate"
+      fi
+  - pip install -U tox twine wheel codecov
+
 script: tox
+after_success:
+  - codecov
 notifications:
   irc:
     use_notice: true
     skip_join: true
     channels:
     - irc.freenode.org#scrapy
+cache:
+  directories:
+    - $HOME/.cache/pip
 deploy:
   provider: pypi
   distributions: "sdist bdist_wheel"
@@ -22,6 +77,5 @@ deploy:
     secure: JaAKcy1AXWXDK3LXdjOtKyaVPCSFoCGCnW15g4f65E/8Fsi9ZzDfmBa4Equs3IQb/vs/if2SVrzJSr7arN7r9Z38Iv1mUXHkFAyA3Ym8mThfABBzzcUWEQhIHrCX0Tdlx9wQkkhs+PZhorlmRS4gg5s6DzPaeA2g8SCgmlRmFfA=
   on:
     tags: true
-    all_branches: true
     repo: scrapy/scrapy
-    condition: "$TOXENV == py27 && $TRAVIS_TAG =~ ^[0-9][.][0-9]*[02468][.]"
+    condition: "$PYPI_RELEASE_JOB == true && $TRAVIS_TAG =~ ^[0-9]+[.][0-9]+[.][0-9]+(rc[0-9]+|[.]dev[0-9]+)?$"
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 000000000..d1cd3e517
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,74 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to make participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, gender identity and expression, level of experience,
+nationality, personal appearance, race, religion, or sexual identity and
+orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment
+include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or
+  advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic
+  address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or
+reject comments, commits, code, wiki edits, issues, and other contributions
+that are not aligned to this Code of Conduct, or to ban temporarily or
+permanently any contributor for other behaviors that they deem inappropriate,
+threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies both within project spaces and in public spaces
+when an individual is representing the project or its community. Examples of
+representing a project or community include using an official project e-mail
+address, posting via an official social media account, or acting as an appointed
+representative at an online or offline event. Representation of a project may be
+further defined and clarified by project maintainers.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting the project team at opensource@scrapinghub.com. All
+complaints will be reviewed and investigated and will result in a response that
+is deemed necessary and appropriate to the circumstances. The project team is
+obligated to maintain confidentiality with regard to the reporter of an incident.
+Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good
+faith may face temporary or permanent repercussions as determined by other
+members of the project's leadership.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
+available at [http://contributor-covenant.org/version/1/4][version].
+
+[homepage]: http://contributor-covenant.org
+[version]: http://contributor-covenant.org/version/1/4/
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 6624b43b6..a05d07aee 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,2 +1,6 @@
 The guidelines for contributing are available here:
-http://doc.scrapy.org/en/latest/contributing.html
+https://docs.scrapy.org/en/master/contributing.html
+
+Please do not abuse the issue tracker for support questions.
+If your issue topic can be rephrased to "How to ...?", please use the
+support channels to get it answered: https://scrapy.org/community/
diff --git a/INSTALL b/INSTALL
index 84803a933..06e812936 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,4 +1,4 @@
 For information about installing Scrapy see:
 
 * docs/intro/install.rst (local file)
-* http://doc.scrapy.org/en/latest/intro/install.html (online version)
+* https://docs.scrapy.org/en/latest/intro/install.html (online version)
diff --git a/LICENSE b/LICENSE
index 68ccf9762..4d0a0863a 100644
--- a/LICENSE
+++ b/LICENSE
@@ -4,11 +4,11 @@ All rights reserved.
 Redistribution and use in source and binary forms, with or without modification,
 are permitted provided that the following conditions are met:
 
-    1. Redistributions of source code must retain the above copyright notice, 
-       this list of conditions and the following disclaimer.
-    
-    2. Redistributions in binary form must reproduce the above copyright 
-       notice, this list of conditions and the following disclaimer in the
+    1. Redistributions of source code must retain the above copyright notice,
+       this list of conditions, and the following disclaimer.
+
+    2. Redistributions in binary form must reproduce the above copyright
+       notice, this list of conditions, and the following disclaimer in the
        documentation and/or other materials provided with the distribution.
 
     3. Neither the name of Scrapy nor the names of its contributors may be used
diff --git a/MANIFEST.in b/MANIFEST.in
index 0561cc74c..ae7db51fa 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -3,11 +3,24 @@ include AUTHORS
 include INSTALL
 include LICENSE
 include MANIFEST.in
+include NEWS
+
 include scrapy/VERSION
 include scrapy/mime.types
+
+include codecov.yml
+include conftest.py
+include pytest.ini
+include requirements-*.txt
+include tox.ini
+
 recursive-include scrapy/templates *
 recursive-include scrapy license.txt
 recursive-include docs *
 prune docs/build
+
 recursive-include extras *
 recursive-include bin *
+recursive-include tests *
+
+global-exclude __pycache__ *.py[cod]
diff --git a/Makefile.buildbot b/Makefile.buildbot
deleted file mode 100644
index 68c8bdc54..000000000
--- a/Makefile.buildbot
+++ /dev/null
@@ -1,21 +0,0 @@
-TRIAL := $(shell which trial)
-BRANCH := $(shell git rev-parse --abbrev-ref HEAD)
-export PYTHONPATH=$(PWD)
-
-test:
-	coverage run --branch $(TRIAL) --reporter=text tests
-	rm -rf htmlcov && coverage html
-	-s3cmd sync -P htmlcov/ s3://static.scrapy.org/coverage-scrapy-$(BRANCH)/
-
-build:
-	test $(BRANCH) != master || git describe >scrapy/VERSION
-	python extras/makedeb.py build
-
-clean:
-	git checkout debian scrapy/VERSION
-	git clean -dfq
-
-pypi:
-	umask 0022 &&  chmod -R a+rX . && python setup.py sdist upload
-
-.PHONY: clean test build
diff --git a/README.rst b/README.rst
index 6020a3670..0e3939e9b 100644
--- a/README.rst
+++ b/README.rst
@@ -2,31 +2,46 @@
 Scrapy
 ======
 
-.. image:: https://badge.fury.io/py/Scrapy.png
-   :target: http://badge.fury.io/py/Scrapy
+.. image:: https://img.shields.io/pypi/v/Scrapy.svg
+   :target: https://pypi.python.org/pypi/Scrapy
+   :alt: PyPI Version
 
-.. image:: https://secure.travis-ci.org/scrapy/scrapy.png?branch=master
-   :target: http://travis-ci.org/scrapy/scrapy
+.. image:: https://img.shields.io/pypi/pyversions/Scrapy.svg
+   :target: https://pypi.python.org/pypi/Scrapy
+   :alt: Supported Python Versions
+
+.. image:: https://img.shields.io/travis/scrapy/scrapy/master.svg
+   :target: https://travis-ci.org/scrapy/scrapy
+   :alt: Build Status
+
+.. image:: https://img.shields.io/badge/wheel-yes-brightgreen.svg
+   :target: https://pypi.python.org/pypi/Scrapy
+   :alt: Wheel Status
+
+.. image:: https://img.shields.io/codecov/c/github/scrapy/scrapy/master.svg
+   :target: https://codecov.io/github/scrapy/scrapy?branch=master
+   :alt: Coverage report
+
+.. image:: https://anaconda.org/conda-forge/scrapy/badges/version.svg
+   :target: https://anaconda.org/conda-forge/scrapy
+   :alt: Conda Version
 
-.. image:: https://pypip.in/wheel/Scrapy/badge.png
-    :target: https://pypi.python.org/pypi/Scrapy/
-    :alt: Wheel Status
 
 Overview
 ========
 
-Scrapy is a fast high-level screen scraping and web crawling framework, used to
+Scrapy is a fast high-level web crawling and web scraping framework, used to
 crawl websites and extract structured data from their pages. It can be used for
 a wide range of purposes, from data mining to monitoring and automated testing.
 
-For more information including a list of features check the Scrapy homepage at:
-http://scrapy.org
+Check the Scrapy homepage at https://scrapy.org for more information,
+including a list of features.
 
 Requirements
 ============
 
-* Python 2.7
-* Works on Linux, Windows, Mac OSX, BSD
+* Python 3.5.2+
+* Works on Linux, Windows, macOS, BSD
 
 Install
 =======
@@ -35,37 +50,45 @@ The quick way::
 
     pip install scrapy
 
-For more details see the install section in the documentation:
-http://doc.scrapy.org/en/latest/intro/install.html
-
-Releases
-========
-
-You can download the latest stable and development releases from:
-http://scrapy.org/download/
+See the install section in the documentation at
+https://docs.scrapy.org/en/latest/intro/install.html for more details.
 
 Documentation
 =============
 
-Documentation is available online at http://doc.scrapy.org/ and in the ``docs``
+Documentation is available online at https://docs.scrapy.org/ and in the ``docs``
 directory.
 
+Releases
+========
+
+You can check https://docs.scrapy.org/en/latest/news.html for the release notes.
+
 Community (blog, twitter, mail list, IRC)
 =========================================
 
-See http://scrapy.org/community/
+See https://scrapy.org/community/ for details.
 
 Contributing
 ============
 
-See http://doc.scrapy.org/en/latest/contributing.html
+See https://docs.scrapy.org/en/master/contributing.html for details.
+
+Code of Conduct
+---------------
+
+Please note that this project is released with a Contributor Code of Conduct
+(see https://github.com/scrapy/scrapy/blob/master/CODE_OF_CONDUCT.md).
+
+By participating in this project you agree to abide by its terms.
+Please report unacceptable behavior to opensource@scrapinghub.com.
 
 Companies using Scrapy
 ======================
 
-See http://scrapy.org/companies/
+See https://scrapy.org/companies/ for a list.
 
 Commercial Support
 ==================
 
-See http://scrapy.org/support/
+See https://scrapy.org/support/ for details.
diff --git a/artwork/README b/artwork/README.rst
similarity index 73%
rename from artwork/README
rename to artwork/README.rst
index c185d57da..8a1028cde 100644
--- a/artwork/README
+++ b/artwork/README.rst
@@ -1,3 +1,4 @@
+==============
 Scrapy artwork
 ==============
 
@@ -8,10 +9,10 @@ scrapy-logo.jpg
 
 Main Scrapy logo, in JPEG format.
 
-qlassik.zip 
+qlassik.zip
 -----------
 
-Font used for Scrapy logo. Homepage: http://www.dafont.com/qlassik.font
+Font used for Scrapy logo. Homepage: https://www.dafont.com/qlassik.font
 
 scrapy-blog.logo.xcf
 --------------------
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
new file mode 100644
index 000000000..710e42090
--- /dev/null
+++ b/azure-pipelines.yml
@@ -0,0 +1,24 @@
+variables:
+  TOXENV: py
+pool:
+  vmImage: 'windows-latest'
+strategy:
+  matrix:
+    Python35:
+      python.version: '3.5'
+      TOXENV: windows-pinned
+    Python36:
+      python.version: '3.6'
+    Python37:
+      python.version: '3.7'
+    Python38:
+      python.version: '3.8'
+steps:
+- task: UsePythonVersion@0
+  inputs:
+    versionSpec: '$(python.version)'
+  displayName: 'Use Python $(python.version)'
+- script: |
+    pip install -U tox twine wheel codecov
+    tox
+  displayName: 'Run test suite'
diff --git a/bin/scrapy b/bin/scrapy
deleted file mode 100755
index 918ea7fbd..000000000
--- a/bin/scrapy
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/usr/bin/env python
-
-from scrapy.cmdline import execute
-execute()
diff --git a/codecov.yml b/codecov.yml
new file mode 100644
index 000000000..d8aa6b984
--- /dev/null
+++ b/codecov.yml
@@ -0,0 +1,6 @@
+comment:
+  layout: "header, diff, tree"
+
+coverage:
+  status:
+    project: false
diff --git a/conftest.py b/conftest.py
index 9f9a5bca7..b39d644a5 100644
--- a/conftest.py
+++ b/conftest.py
@@ -1,49 +1,55 @@
-import six
+from pathlib import Path
+
 import pytest
-from twisted.python import log
-
-from scrapy import optional_features
-
-collect_ignore = ["scrapy/stats.py"]
-if 'django' not in optional_features:
-    collect_ignore.append("tests/test_djangoitem/models.py")
-
-if six.PY3:
-    for fn in open('tests/py3-ignores.txt'):
-        if fn.strip():
-            collect_ignore.append(fn.strip())
-
-class LogObservers:
-    """Class for keeping track of log observers across test modules"""
-
-    def __init__(self):
-        self.observers = []
-
-    def add(self, logfile='test.log'):
-        fileobj = open(logfile, 'wb')
-        observer = log.FileLogObserver(fileobj)
-        log.startLoggingWithObserver(observer.emit, 0)
-        self.observers.append((fileobj, observer))
-
-    def remove(self):
-        fileobj, observer = self.observers.pop()
-        log.removeObserver(observer.emit)
-        fileobj.close()
 
 
-@pytest.fixture(scope='module')
-def log_observers():
-    return LogObservers()
+def _py_files(folder):
+    return (str(p) for p in Path(folder).rglob('*.py'))
 
 
-@pytest.fixture()
-def setlog(request, log_observers):
-    """Attach test.log file observer to twisted log, for trial compatibility"""
-    log_observers.add()
-    request.addfinalizer(log_observers.remove)
+collect_ignore = [
+    # not a test, but looks like a test
+    "scrapy/utils/testsite.py",
+    # contains scripts to be run by tests/test_crawler.py::CrawlerProcessSubprocess
+    *_py_files("tests/CrawlerProcess"),
+    # contains scripts to be run by tests/test_crawler.py::CrawlerRunnerSubprocess
+    *_py_files("tests/CrawlerRunner"),
+    # Py36-only parts of respective tests
+    *_py_files("tests/py36"),
+]
+
+for line in open('tests/ignores.txt'):
+    file_path = line.strip()
+    if file_path and file_path[0] != '#':
+        collect_ignore.append(file_path)
 
 
 @pytest.fixture()
 def chdir(tmpdir):
     """Change to pytest-provided temporary directory"""
     tmpdir.chdir()
+
+
+def pytest_collection_modifyitems(session, config, items):
+    # Avoid executing tests when executing `--flake8` flag (pytest-flake8)
+    try:
+        from pytest_flake8 import Flake8Item
+        if config.getoption('--flake8'):
+            items[:] = [item for item in items if isinstance(item, Flake8Item)]
+    except ImportError:
+        pass
+
+
+@pytest.fixture(scope='class')
+def reactor_pytest(request):
+    if not request.cls:
+        # doctests
+        return
+    request.cls.reactor_pytest = request.config.getoption("--reactor")
+    return request.cls.reactor_pytest
+
+
+@pytest.fixture(autouse=True)
+def only_asyncio(request, reactor_pytest):
+    if request.node.get_closest_marker('only_asyncio') and reactor_pytest != 'asyncio':
+        pytest.skip('This test is only run with --reactor=asyncio')
diff --git a/debian/changelog b/debian/changelog
deleted file mode 100644
index f4f5b9d9c..000000000
--- a/debian/changelog
+++ /dev/null
@@ -1,5 +0,0 @@
-scrapy-SUFFIX (0.11) unstable; urgency=low
-
-  * Initial release.
-
- -- Scrapinghub Team <info@scrapinghub.com>  Thu, 10 Jun 2010 17:24:02 -0300
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index 7f8f011eb..000000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-7
diff --git a/debian/control b/debian/control
deleted file mode 100644
index 4be62895f..000000000
--- a/debian/control
+++ /dev/null
@@ -1,20 +0,0 @@
-Source: scrapy-SUFFIX
-Section: python
-Priority: optional
-Maintainer: Scrapinghub Team <info@scrapinghub.com>
-Build-Depends: debhelper (>= 7.0.50), python (>=2.7), python-twisted, python-w3lib, python-lxml, python-six (>=1.5.2)
-Standards-Version: 3.8.4
-Homepage: http://scrapy.org/
-
-Package: scrapy-SUFFIX
-Architecture: all
-Depends: ${python:Depends}, python-lxml, python-twisted, python-openssl,
- python-w3lib (>= 1.8.0), python-queuelib, python-cssselect (>= 0.9), python-six (>=1.5.2)
-Recommends: python-setuptools
-Conflicts: python-scrapy, scrapy, scrapy-0.11
-Provides: python-scrapy, scrapy
-Description: Python web crawling and scraping framework
- Scrapy is a fast high-level screen scraping and web crawling framework, 
- used to crawl websites and extract structured data from their pages. 
- It can be used for a wide range of purposes, from data mining to 
- monitoring and automated testing.
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index 4cc239002..000000000
--- a/debian/copyright
+++ /dev/null
@@ -1,40 +0,0 @@
-This package was debianized by the Scrapinghub team <info@scrapinghub.com>.
-
-It was downloaded from http://scrapy.org
-
-Upstream Author: Scrapy Developers
-
-Copyright: 2007-2013 Scrapy Developers
-
-License: bsd
-
-Copyright (c) Scrapy developers.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-    1. Redistributions of source code must retain the above copyright notice, 
-       this list of conditions and the following disclaimer.
-    
-    2. Redistributions in binary form must reproduce the above copyright 
-       notice, this list of conditions and the following disclaimer in the
-       documentation and/or other materials provided with the distribution.
-
-    3. Neither the name of Scrapy nor the names of its contributors may be used
-       to endorse or promote products derived from this software without
-       specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-The Debian packaging is (C) 2010-2013, Scrapinghub <info@scrapinghub.com> and
-is licensed under the BSD, see `/usr/share/common-licenses/BSD'.
diff --git a/debian/pyversions b/debian/pyversions
deleted file mode 100644
index 1effb0034..000000000
--- a/debian/pyversions
+++ /dev/null
@@ -1 +0,0 @@
-2.7
diff --git a/debian/rules b/debian/rules
deleted file mode 100755
index b8796e6e3..000000000
--- a/debian/rules
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/usr/bin/make -f
-# -*- makefile -*-
-
-%:
-	dh $@
diff --git a/debian/scrapy.docs b/debian/scrapy.docs
deleted file mode 100644
index c19ffba4d..000000000
--- a/debian/scrapy.docs
+++ /dev/null
@@ -1,2 +0,0 @@
-README.rst
-AUTHORS
diff --git a/debian/scrapy.install b/debian/scrapy.install
deleted file mode 100644
index 5977d5f43..000000000
--- a/debian/scrapy.install
+++ /dev/null
@@ -1 +0,0 @@
-extras/scrapy_bash_completion etc/bash_completion.d/
diff --git a/debian/scrapy.lintian-overrides b/debian/scrapy.lintian-overrides
deleted file mode 100644
index 955e7def0..000000000
--- a/debian/scrapy.lintian-overrides
+++ /dev/null
@@ -1,2 +0,0 @@
-new-package-should-close-itp-bug
-extra-license-file usr/share/pyshared/scrapy/xlib/pydispatch/license.txt
diff --git a/debian/scrapy.manpages b/debian/scrapy.manpages
deleted file mode 100644
index 4818e9c92..000000000
--- a/debian/scrapy.manpages
+++ /dev/null
@@ -1 +0,0 @@
-extras/scrapy.1
diff --git a/docs/Makefile b/docs/Makefile
index c6e4dd64d..ff68bf1ae 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -8,8 +8,10 @@ PYTHON       = python
 SPHINXOPTS   =
 PAPER        =
 SOURCES      =
+SHELL        = /bin/bash
 
-ALLSPHINXOPTS = -b $(BUILDER) -d build/doctrees -D latex_paper_size=$(PAPER) \
+ALLSPHINXOPTS = -b $(BUILDER) -d build/doctrees \
+                -D latex_elements.papersize=$(PAPER) \
                 $(SPHINXOPTS) . build/$(BUILDER) $(SOURCES)
 
 .PHONY: help update build html htmlhelp clean
@@ -22,13 +24,19 @@ help:
 	@echo "  text      to make plain text files"
 	@echo "  changes   to make an overview over all changed/added/deprecated items"
 	@echo "  linkcheck to check all external links for integrity"
+	@echo "  watch     build HTML docs, open in browser and watch for changes"
 
-
-build: 
+build-dirs:
 	mkdir -p build/$(BUILDER) build/doctrees
+
+build: build-dirs
 	sphinx-build $(ALLSPHINXOPTS)
 	@echo
 
+build-ignore-errors: build-dirs
+	-sphinx-build $(ALLSPHINXOPTS)
+	@echo
+
 
 html: BUILDER = html
 html: build
@@ -58,6 +66,12 @@ linkcheck: build
 	@echo "Link check complete; look for any errors in the above output " \
 	      "or in build/$(BUILDER)/output.txt"
 
+linkfix: BUILDER = linkcheck
+linkfix: build-ignore-errors
+	$(PYTHON) utils/linkfix.py
+	@echo "Fixing redirecting links in docs has finished; check all " \
+	      "replacements before committing them"
+
 doctest: BUILDER = doctest
 doctest: build
 	@echo "Testing of doctests in the sources finished, look at the " \
@@ -68,9 +82,15 @@ pydoc-topics: build
 	@echo "Building finished; now copy build/pydoc-topics/pydoc_topics.py " \
 	      "into the Lib/ directory"
 
+coverage: BUILDER = coverage
+coverage: build
+
 htmlview: html
-	 $(PYTHON) -c "import webbrowser; webbrowser.open('build/html/index.html')"
+	 $(PYTHON) -c "import webbrowser, os; webbrowser.open('file://' + \
+	 os.path.realpath('build/html/index.html'))"
 
 clean:
 	-rm -rf build/*
 
+watch: htmlview
+	watchmedo shell-command -p '*.rst' -c 'make html' -R -D
diff --git a/docs/README b/docs/README.rst
similarity index 58%
rename from docs/README
rename to docs/README.rst
index 7fd549374..0b7afa548 100644
--- a/docs/README
+++ b/docs/README.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 ======================================
 Scrapy documentation quick start guide
 ======================================
@@ -8,16 +10,12 @@ This file provides a quick guide on how to compile the Scrapy documentation.
 Setup the environment
 ---------------------
 
-To compile the documentation you need the following Python libraries:
+To compile the documentation you need Sphinx Python library. To install it
+and all its dependencies run the following command from this dir
 
- * Sphinx
- * docutils
- * jinja
+::
 
-If you have setuptools available the following command will install all of them
-(since Sphinx requires both docutils and jinja)::
-
-    easy_install Sphinx
+    pip install -r requirements.txt
 
 
 Compile the documentation
@@ -52,3 +50,19 @@ To cleanup all generated documentation files and start from scratch run::
 Keep in mind that this command won't touch any documentation source files.
 
 
+Recreating documentation on the fly
+-----------------------------------
+
+There is a way to recreate the doc automatically when you make changes, you
+need to install watchdog (``pip install watchdog``) and then use::
+
+    make watch
+
+Alternative method using tox
+----------------------------
+
+To compile the documentation to HTML run the following command::
+
+    tox -e docs
+
+Documentation will be generated (in HTML format) inside the ``.tox/docs/tmp/html`` dir.
diff --git a/docs/_ext/scrapydocs.py b/docs/_ext/scrapydocs.py
index 1fa1c93d6..640660943 100644
--- a/docs/_ext/scrapydocs.py
+++ b/docs/_ext/scrapydocs.py
@@ -1,5 +1,82 @@
 from docutils.parsers.rst.roles import set_classes
 from docutils import nodes
+from docutils.parsers.rst import Directive
+from sphinx.util.nodes import make_refnode
+from operator import itemgetter
+
+
+class settingslist_node(nodes.General, nodes.Element):
+    pass
+
+
+class SettingsListDirective(Directive):
+    def run(self):
+        return [settingslist_node('')]
+
+
+def is_setting_index(node):
+    if node.tagname == 'index':
+        # index entries for setting directives look like:
+        # [('pair', 'SETTING_NAME; setting', 'std:setting-SETTING_NAME', '')]
+        entry_type, info, refid = node['entries'][0][:3]
+        return entry_type == 'pair' and info.endswith('; setting')
+    return False
+
+
+def get_setting_target(node):
+    # target nodes are placed next to the node in the doc tree
+    return node.parent[node.parent.index(node) + 1]
+
+
+def get_setting_name_and_refid(node):
+    """Extract setting name from directive index node"""
+    entry_type, info, refid = node['entries'][0][:3]
+    return info.replace('; setting', ''), refid
+
+
+def collect_scrapy_settings_refs(app, doctree):
+    env = app.builder.env
+
+    if not hasattr(env, 'scrapy_all_settings'):
+        env.scrapy_all_settings = []
+
+    for node in doctree.traverse(is_setting_index):
+        targetnode = get_setting_target(node)
+        assert isinstance(targetnode, nodes.target), "Next node is not a target"
+
+        setting_name, refid = get_setting_name_and_refid(node)
+
+        env.scrapy_all_settings.append({
+            'docname': env.docname,
+            'setting_name': setting_name,
+            'refid': refid,
+        })
+
+
+def make_setting_element(setting_data, app, fromdocname):
+    refnode = make_refnode(app.builder, fromdocname,
+                           todocname=setting_data['docname'],
+                           targetid=setting_data['refid'],
+                           child=nodes.Text(setting_data['setting_name']))
+    p = nodes.paragraph()
+    p += refnode
+
+    item = nodes.list_item()
+    item += p
+    return item
+
+
+def replace_settingslist_nodes(app, doctree, fromdocname):
+    env = app.builder.env
+
+    for node in doctree.traverse(settingslist_node):
+        settings_list = nodes.bullet_list()
+        settings_list.extend([make_setting_element(d, app, fromdocname)
+                              for d in sorted(env.scrapy_all_settings,
+                                              key=itemgetter('setting_name'))
+                              if fromdocname != d['docname']])
+        node.replace_self(settings_list)
+
 
 def setup(app):
     app.add_crossref_type(
@@ -27,24 +104,34 @@ def setup(app):
     app.add_role('issue', issue_role)
     app.add_role('rev', rev_role)
 
+    app.add_node(settingslist_node)
+    app.add_directive('settingslist', SettingsListDirective)
+
+    app.connect('doctree-read', collect_scrapy_settings_refs)
+    app.connect('doctree-resolved', replace_settingslist_nodes)
+
+
 def source_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
     ref = 'https://github.com/scrapy/scrapy/blob/master/' + text
     set_classes(options)
     node = nodes.reference(rawtext, text, refuri=ref, **options)
     return [node], []
 
+
 def issue_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
     ref = 'https://github.com/scrapy/scrapy/issues/' + text
     set_classes(options)
     node = nodes.reference(rawtext, 'issue ' + text, refuri=ref, **options)
     return [node], []
 
+
 def commit_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
     ref = 'https://github.com/scrapy/scrapy/commit/' + text
     set_classes(options)
     node = nodes.reference(rawtext, 'commit ' + text, refuri=ref, **options)
     return [node], []
 
+
 def rev_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
     ref = 'http://hg.scrapy.org/scrapy/changeset/' + text
     set_classes(options)
diff --git a/docs/_static/scrapydoc.css b/docs/_static/scrapydoc.css
deleted file mode 100644
index 3e58a5e70..000000000
--- a/docs/_static/scrapydoc.css
+++ /dev/null
@@ -1,657 +0,0 @@
-/**
- * Sphinx Doc Design
- */
-
-body {
-    font-family: sans-serif;
-    font-size: 100%;
-    background-color: #3d1e11;
-    color: #000;
-    margin: 0;
-    padding: 0;
-}
-
-/* :::: LAYOUT :::: */
-
-div.document {
-    background-color: #69341e;
-}
-
-div.documentwrapper {
-    float: left;
-    width: 100%;
-}
-
-div.bodywrapper {
-    margin: 0 0 0 230px;
-}
-
-div.body {
-    background-color: white;
-    padding: 0 20px 30px 20px;
-}
-
-div.sphinxsidebarwrapper {
-    padding: 10px 5px 0 10px;
-}
-
-div.sphinxsidebar {
-    float: left;
-    width: 230px;
-    margin-left: -100%;
-    font-size: 90%;
-}
-
-div.clearer {
-    clear: both;
-}
-
-div.footer {
-    color: #fff;
-    width: 100%;
-    padding: 9px 0 9px 0;
-    text-align: center;
-    font-size: 75%;
-}
-
-div.footer a {
-    color: #fff;
-    text-decoration: underline;
-}
-
-div.related {
-    background-color: #5b1616;
-    color: #fff;
-    width: 100%;
-    line-height: 30px;
-    font-size: 90%;
-}
-
-div.related h3 {
-    display: none;
-}
-
-div.related ul {
-    margin: 0;
-    padding: 0 0 0 10px;
-    list-style: none;
-}
-
-div.related li {
-    display: inline;
-}
-
-div.related li.right {
-    float: right;
-    margin-right: 5px;
-}
-
-div.related a {
-    color: white;
-}
-
-/* ::: TOC :::: */
-div.sphinxsidebar h3 {
-    font-family: 'Trebuchet MS', sans-serif;
-    color: white;
-    font-size: 1.4em;
-    font-weight: normal;
-    margin: 0;
-    padding: 0;
-}
-
-div.sphinxsidebar h3 a {
-    color: white;
-}
-
-div.sphinxsidebar h4 {
-    font-family: 'Trebuchet MS', sans-serif;
-    color: white;
-    font-size: 1.3em;
-    font-weight: normal;
-    margin: 5px 0 0 0;
-    padding: 0;
-}
-
-div.sphinxsidebar p {
-    color: white;
-}
-
-div.sphinxsidebar p.topless {
-    margin: 5px 10px 10px 10px;
-}
-
-div.sphinxsidebar ul {
-    margin: 10px;
-    padding: 0;
-    list-style: none;
-    color: white;
-}
-
-div.sphinxsidebar ul ul,
-div.sphinxsidebar ul.want-points {
-    margin-left: 20px;
-    list-style: square;
-}
-
-div.sphinxsidebar ul ul {
-    margin-top: 0;
-    margin-bottom: 0;
-}
-
-div.sphinxsidebar a {
-    color: #ffca9b;
-}
-
-div.sphinxsidebar form {
-    margin-top: 10px;
-}
-
-div.sphinxsidebar input {
-    border: 1px solid #ffca9b;
-    font-family: sans-serif;
-    font-size: 1em;
-}
-
-/* :::: MODULE CLOUD :::: */
-div.modulecloud {
-    margin: -5px 10px 5px 10px;
-    padding: 10px;
-    line-height: 160%;
-    border: 1px solid #cbe7e5;
-    background-color: #f2fbfd;
-}
-
-div.modulecloud a {
-    padding: 0 5px 0 5px;
-}
-
-/* :::: SEARCH :::: */
-ul.search {
-    margin: 10px 0 0 20px;
-    padding: 0;
-}
-
-ul.search li {
-    padding: 5px 0 5px 20px;
-    background-image: url(file.png);
-    background-repeat: no-repeat;
-    background-position: 0 7px;
-}
-
-ul.search li a {
-    font-weight: bold;
-}
-
-ul.search li div.context {
-    color: #888;
-    margin: 2px 0 0 30px;
-    text-align: left;
-}
-
-ul.keywordmatches li.goodmatch a {
-    font-weight: bold;
-}
-
-/* :::: COMMON FORM STYLES :::: */
-
-div.actions {
-    padding: 5px 10px 5px 10px;
-    border-top: 1px solid #cbe7e5;
-    border-bottom: 1px solid #cbe7e5;
-    background-color: #e0f6f4;
-}
-
-form dl {
-    color: #333;
-}
-
-form dt {
-    clear: both;
-    float: left;
-    min-width: 110px;
-    margin-right: 10px;
-    padding-top: 2px;
-}
-
-input#homepage {
-    display: none;
-}
-
-div.error {
-    margin: 5px 20px 0 0;
-    padding: 5px;
-    border: 1px solid #d00;
-    font-weight: bold;
-}
-
-/* :::: INDEX PAGE :::: */
-
-table.contentstable {
-    width: 90%;
-}
-
-table.contentstable p.biglink {
-    line-height: 150%;
-}
-
-a.biglink {
-    font-size: 1.3em;
-}
-
-span.linkdescr {
-    font-style: italic;
-    padding-top: 5px;
-    font-size: 90%;
-}
-
-/* :::: INDEX STYLES :::: */
-
-table.indextable td {
-    text-align: left;
-    vertical-align: top;
-}
-
-table.indextable dl, table.indextable dd {
-    margin-top: 0;
-    margin-bottom: 0;
-}
-
-table.indextable tr.pcap {
-    height: 10px;
-}
-
-table.indextable tr.cap {
-    margin-top: 10px;
-    background-color: #f2f2f2;
-}
-
-img.toggler {
-    margin-right: 3px;
-    margin-top: 3px;
-    cursor: pointer;
-}
-
-form.pfform {
-    margin: 10px 0 20px 0;
-}
-
-/* :::: GLOBAL STYLES :::: */
-
-.docwarning {
-    background-color: #ffe4e4;
-    padding: 10px;
-    margin: 0 -20px 0 -20px;
-    border-bottom: 1px solid #f66;
-}
-
-p.subhead {
-    font-weight: bold;
-    margin-top: 20px;
-}
-
-a {
-    color: #6e0909;
-    text-decoration: none;
-}
-
-a:hover {
-    text-decoration: underline;
-}
-
-div.body h1,
-div.body h2,
-div.body h3,
-div.body h4,
-div.body h5,
-div.body h6 {
-    font-family: 'Trebuchet MS', sans-serif;
-    background-color: #f2f2f2;
-    font-weight: normal;
-    color: #331F0A;
-    border-bottom: 1px solid #ccc;
-    margin: 20px -20px 10px -20px;
-    padding: 3px 0 3px 10px;
-}
-
-div.body h1 { margin-top: 0; font-size: 200%; }
-div.body h2 { font-size: 160%; }
-div.body h3 { font-size: 140%; }
-div.body h4 { font-size: 120%; }
-div.body h5 { font-size: 110%; }
-div.body h6 { font-size: 100%; }
-
-a.headerlink {
-    color: #c60f0f;
-    font-size: 0.8em;
-    padding: 0 4px 0 4px;
-    text-decoration: none;
-    visibility: hidden;
-}
-
-h1:hover > a.headerlink,
-h2:hover > a.headerlink,
-h3:hover > a.headerlink,
-h4:hover > a.headerlink,
-h5:hover > a.headerlink,
-h6:hover > a.headerlink,
-dt:hover > a.headerlink {
-    visibility: visible;
-}
-
-a.headerlink:hover {
-    background-color: #c60f0f;
-    color: white;
-}
-
-div.body p, div.body dd, div.body li {
-    text-align: justify;
-    line-height: 130%;
-}
-
-div.body p.caption {
-    text-align: inherit;
-}
-
-div.body td {
-    text-align: left;
-}
-
-ul.fakelist {
-    list-style: none;
-    margin: 10px 0 10px 20px;
-    padding: 0;
-}
-
-.field-list ul {
-    padding-left: 1em;
-}
-
-.first {
-    margin-top: 0 !important;
-}
-
-/* "Footnotes" heading */
-p.rubric {
-    margin-top: 30px;
-    font-weight: bold;
-}
-
-/* Sidebars */
-
-div.sidebar {
-    margin: 0 0 0.5em 1em;
-    border: 1px solid #ddb;
-    padding: 7px 7px 0 7px;
-    background-color: #ffe;
-    width: 40%;
-    float: right;
-}
-
-p.sidebar-title {
-    font-weight: bold;
-}
-
-/* "Topics" */
-
-div.topic {
-    background-color: #eee;
-    border: 1px solid #ccc;
-    padding: 7px 7px 0 7px;
-    margin: 10px 0 10px 0;
-}
-
-p.topic-title {
-    font-size: 1.1em;
-    font-weight: bold;
-    margin-top: 10px;
-}
-
-/* Admonitions */
-
-div.admonition {
-    margin-top: 10px;
-    margin-bottom: 10px;
-    padding: 7px;
-}
-
-div.admonition dt {
-    font-weight: bold;
-}
-
-div.admonition dl {
-    margin-bottom: 0;
-}
-
-div.admonition p.admonition-title + p {
-    display: inline;
-}
-
-div.seealso {
-    background-color: #ffc;
-    border: 1px solid #ff6;
-}
-
-div.warning {
-    background-color: #ffe4e4;
-    border: 1px solid #f66;
-}
-
-div.note {
-    background-color: #eee;
-    border: 1px solid #ccc;
-}
-
-p.admonition-title {
-    margin: 0px 10px 5px 0px;
-    font-weight: bold;
-    display: inline;
-}
-
-p.admonition-title:after {
-    content: ":";
-}
-
-div.body p.centered {
-    text-align: center;
-    margin-top: 25px;
-}
-
-table.docutils {
-    border: 0;
-}
-
-table.docutils td, table.docutils th {
-    padding: 1px 8px 1px 0;
-    border-top: 0;
-    border-left: 0;
-    border-right: 0;
-    border-bottom: 1px solid #aaa;
-}
-
-table.field-list td, table.field-list th {
-    border: 0 !important;
-}
-
-table.footnote td, table.footnote th {
-    border: 0 !important;
-}
-
-.field-list ul {
-    margin: 0;
-    padding-left: 1em;
-}
-
-.field-list p {
-    margin: 0;
-}
-
-dl {
-    margin-bottom: 15px;
-    clear: both;
-}
-
-dd p {
-    margin-top: 0px;
-}
-
-dd ul, dd table {
-    margin-bottom: 10px;
-}
-
-dd {
-    margin-top: 3px;
-    margin-bottom: 10px;
-    margin-left: 30px;
-}
-
-.refcount {
-    color: #060;
-}
-
-dt:target,
-.highlight {
-    background-color: #fbe54e;
-}
-
-dl.glossary dt {
-    font-weight: bold;
-    font-size: 1.1em;
-}
-
-th {
-    text-align: left;
-    padding-right: 5px;
-}
-
-pre {
-    padding: 5px;
-    background-color: #efc;
-    color: #333;
-    border: 1px solid #ac9;
-    border-left: none;
-    border-right: none;
-    overflow: auto;
-}
-
-td.linenos pre {
-    padding: 5px 0px;
-    border: 0;
-    background-color: transparent;
-    color: #aaa;
-}
-
-table.highlighttable {
-    margin-left: 0.5em;
-}
-
-table.highlighttable td {
-    padding: 0 0.5em 0 0.5em;
-}
-
-tt {
-    background-color: #ecf0f3;
-    padding: 0 1px 0 1px;
-    font-size: 0.95em;
-}
-
-tt.descname {
-    background-color: transparent;
-    font-weight: bold;
-    font-size: 1.2em;
-}
-
-tt.descclassname {
-    background-color: transparent;
-}
-
-tt.xref, a tt {
-    background-color: transparent;
-    font-weight: bold;
-}
-
-.footnote:target  { background-color: #ffa }
-
-h1 tt, h2 tt, h3 tt, h4 tt, h5 tt, h6 tt {
-    background-color: transparent;
-}
-
-.optional {
-    font-size: 1.3em;
-}
-
-.versionmodified {
-    font-style: italic;
-}
-
-form.comment {
-    margin: 0;
-    padding: 10px 30px 10px 30px;
-    background-color: #eee;
-}
-
-form.comment h3 {
-    background-color: #326591;
-    color: white;
-    margin: -10px -30px 10px -30px;
-    padding: 5px;
-    font-size: 1.4em;
-}
-
-form.comment input,
-form.comment textarea {
-    border: 1px solid #ccc;
-    padding: 2px;
-    font-family: sans-serif;
-    font-size: 100%;
-}
-
-form.comment input[type="text"] {
-    width: 240px;
-}
-
-form.comment textarea {
-    width: 100%;
-    height: 200px;
-    margin-bottom: 10px;
-}
-
-.system-message {
-    background-color: #fda;
-    padding: 5px;
-    border: 3px solid red;
-}
-
-img.math {
-    vertical-align: middle;
-}
-
-div.math p {
-    text-align: center;
-}
-
-span.eqno {
-    float: right;
-}
-
-img.logo {
-    border: 0;
-}
-
-/* :::: PRINT :::: */
-@media print {
-    div.document,
-    div.documentwrapper,
-    div.bodywrapper {
-        margin: 0;
-        width : 100%;
-    }
-
-    div.sphinxsidebar,
-    div.related,
-    div.footer,
-    div#comments div.new-comment-box,
-    #top-link {
-        display: none;
-    }
-}
diff --git a/docs/_templates/layout.html b/docs/_templates/layout.html
new file mode 100644
index 000000000..a6f6cbda8
--- /dev/null
+++ b/docs/_templates/layout.html
@@ -0,0 +1,16 @@
+{% extends "!layout.html" %}
+
+{% block footer %}
+{{ super() }}
+<script type="text/javascript">
+!function(){var analytics=window.analytics=window.analytics||[];if(!analytics.initialize)if(analytics.invoked)window.console&&console.error&&console.error("Segment snippet included twice.");else{analytics.invoked=!0;analytics.methods=["trackSubmit","trackClick","trackLink","trackForm","pageview","identify","reset","group","track","ready","alias","page","once","off","on"];analytics.factory=function(t){return function(){var e=Array.prototype.slice.call(arguments);e.unshift(t);analytics.push(e);return analytics}};for(var t=0;t<analytics.methods.length;t++){var e=analytics.methods[t];analytics[e]=analytics.factory(e)}analytics.load=function(t){var e=document.createElement("script");e.type="text/javascript";e.async=!0;e.src=("https:"===document.location.protocol?"https://":"http://")+"cdn.segment.com/analytics.js/v1/"+t+"/analytics.min.js";var n=document.getElementsByTagName("script")[0];n.parentNode.insertBefore(e,n)};analytics.SNIPPET_VERSION="3.1.0";
+analytics.load("8UDQfnf3cyFSTsM4YANnW5sXmgZVILbA");
+analytics.page();
+}}();
+
+analytics.ready(function () {
+    ga('require', 'linker');
+    ga('linker:autoLink', ['scrapinghub.com', 'crawlera.com']);
+});
+</script>
+{% endblock %}
diff --git a/docs/_tests/quotes.html b/docs/_tests/quotes.html
new file mode 100644
index 000000000..71aff8847
--- /dev/null
+++ b/docs/_tests/quotes.html
@@ -0,0 +1,281 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+	<meta charset="UTF-8">
+	<title>Quotes to Scrape</title>
+    <link rel="stylesheet" href="/static/bootstrap.min.css">
+    <link rel="stylesheet" href="/static/main.css">
+</head>
+<body>
+    <div class="container">
+        <div class="row header-box">
+            <div class="col-md-8">
+                <h1>
+                    <a href="/" style="text-decoration: none">Quotes to Scrape</a>
+                </h1>
+            </div>
+            <div class="col-md-4">
+                <p>
+                
+                    <a href="/login">Login</a>
+                
+                </p>
+            </div>
+        </div>
+    
+
+<div class="row">
+    <div class="col-md-8">
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”</span>
+        <span>by <small class="author" itemprop="author">Albert Einstein</small>
+        <a href="/author/Albert-Einstein">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="change,deep-thoughts,thinking,world" /    > 
+            
+            <a class="tag" href="/tag/change/page/1/">change</a>
+            
+            <a class="tag" href="/tag/deep-thoughts/page/1/">deep-thoughts</a>
+            
+            <a class="tag" href="/tag/thinking/page/1/">thinking</a>
+            
+            <a class="tag" href="/tag/world/page/1/">world</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“It is our choices, Harry, that show what we truly are, far more than our abilities.”</span>
+        <span>by <small class="author" itemprop="author">J.K. Rowling</small>
+        <a href="/author/J-K-Rowling">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="abilities,choices" /    > 
+            
+            <a class="tag" href="/tag/abilities/page/1/">abilities</a>
+            
+            <a class="tag" href="/tag/choices/page/1/">choices</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”</span>
+        <span>by <small class="author" itemprop="author">Albert Einstein</small>
+        <a href="/author/Albert-Einstein">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="inspirational,life,live,miracle,miracles" /    > 
+            
+            <a class="tag" href="/tag/inspirational/page/1/">inspirational</a>
+            
+            <a class="tag" href="/tag/life/page/1/">life</a>
+            
+            <a class="tag" href="/tag/live/page/1/">live</a>
+            
+            <a class="tag" href="/tag/miracle/page/1/">miracle</a>
+            
+            <a class="tag" href="/tag/miracles/page/1/">miracles</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.”</span>
+        <span>by <small class="author" itemprop="author">Jane Austen</small>
+        <a href="/author/Jane-Austen">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="aliteracy,books,classic,humor" /    > 
+            
+            <a class="tag" href="/tag/aliteracy/page/1/">aliteracy</a>
+            
+            <a class="tag" href="/tag/books/page/1/">books</a>
+            
+            <a class="tag" href="/tag/classic/page/1/">classic</a>
+            
+            <a class="tag" href="/tag/humor/page/1/">humor</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“Imperfection is beauty, madness is genius and it&#39;s better to be absolutely ridiculous than absolutely boring.”</span>
+        <span>by <small class="author" itemprop="author">Marilyn Monroe</small>
+        <a href="/author/Marilyn-Monroe">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="be-yourself,inspirational" /    > 
+            
+            <a class="tag" href="/tag/be-yourself/page/1/">be-yourself</a>
+            
+            <a class="tag" href="/tag/inspirational/page/1/">inspirational</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“Try not to become a man of success. Rather become a man of value.”</span>
+        <span>by <small class="author" itemprop="author">Albert Einstein</small>
+        <a href="/author/Albert-Einstein">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="adulthood,success,value" /    > 
+            
+            <a class="tag" href="/tag/adulthood/page/1/">adulthood</a>
+            
+            <a class="tag" href="/tag/success/page/1/">success</a>
+            
+            <a class="tag" href="/tag/value/page/1/">value</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“It is better to be hated for what you are than to be loved for what you are not.”</span>
+        <span>by <small class="author" itemprop="author">André Gide</small>
+        <a href="/author/Andre-Gide">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="life,love" /    > 
+            
+            <a class="tag" href="/tag/life/page/1/">life</a>
+            
+            <a class="tag" href="/tag/love/page/1/">love</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“I have not failed. I&#39;ve just found 10,000 ways that won&#39;t work.”</span>
+        <span>by <small class="author" itemprop="author">Thomas A. Edison</small>
+        <a href="/author/Thomas-A-Edison">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="edison,failure,inspirational,paraphrased" /    > 
+            
+            <a class="tag" href="/tag/edison/page/1/">edison</a>
+            
+            <a class="tag" href="/tag/failure/page/1/">failure</a>
+            
+            <a class="tag" href="/tag/inspirational/page/1/">inspirational</a>
+            
+            <a class="tag" href="/tag/paraphrased/page/1/">paraphrased</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“A woman is like a tea bag; you never know how strong it is until it&#39;s in hot water.”</span>
+        <span>by <small class="author" itemprop="author">Eleanor Roosevelt</small>
+        <a href="/author/Eleanor-Roosevelt">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="misattributed-eleanor-roosevelt" /    > 
+            
+            <a class="tag" href="/tag/misattributed-eleanor-roosevelt/page/1/">misattributed-eleanor-roosevelt</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“A day without sunshine is like, you know, night.”</span>
+        <span>by <small class="author" itemprop="author">Steve Martin</small>
+        <a href="/author/Steve-Martin">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="humor,obvious,simile" /    > 
+            
+            <a class="tag" href="/tag/humor/page/1/">humor</a>
+            
+            <a class="tag" href="/tag/obvious/page/1/">obvious</a>
+            
+            <a class="tag" href="/tag/simile/page/1/">simile</a>
+            
+        </div>
+    </div>
+
+    <nav>
+        <ul class="pager">
+            
+            
+            <li class="next">
+                <a href="/page/2/">Next <span aria-hidden="true">&rarr;</span></a>
+            </li>
+            
+        </ul>
+    </nav>
+    </div>
+    <div class="col-md-4 tags-box">
+        
+            <h2>Top Ten tags</h2>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 28px" href="/tag/love/">love</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 26px" href="/tag/inspirational/">inspirational</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 26px" href="/tag/life/">life</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 24px" href="/tag/humor/">humor</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 22px" href="/tag/books/">books</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 14px" href="/tag/reading/">reading</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 10px" href="/tag/friendship/">friendship</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 8px" href="/tag/friends/">friends</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 8px" href="/tag/truth/">truth</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 6px" href="/tag/simile/">simile</a>
+            </span>
+            
+        
+    </div>
+</div>
+
+    </div>
+    <footer class="footer">
+        <div class="container">
+            <p class="text-muted">
+                Quotes by: <a href="https://www.goodreads.com/quotes">GoodReads.com</a>
+            </p>
+            <p class="copyright">
+                Made with <span class='sh-red'>❤</span> by <a href="https://scrapinghub.com">Scrapinghub</a>
+            </p>
+        </div>
+    </footer>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/_tests/quotes1.html b/docs/_tests/quotes1.html
new file mode 100644
index 000000000..71aff8847
--- /dev/null
+++ b/docs/_tests/quotes1.html
@@ -0,0 +1,281 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+	<meta charset="UTF-8">
+	<title>Quotes to Scrape</title>
+    <link rel="stylesheet" href="/static/bootstrap.min.css">
+    <link rel="stylesheet" href="/static/main.css">
+</head>
+<body>
+    <div class="container">
+        <div class="row header-box">
+            <div class="col-md-8">
+                <h1>
+                    <a href="/" style="text-decoration: none">Quotes to Scrape</a>
+                </h1>
+            </div>
+            <div class="col-md-4">
+                <p>
+                
+                    <a href="/login">Login</a>
+                
+                </p>
+            </div>
+        </div>
+    
+
+<div class="row">
+    <div class="col-md-8">
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”</span>
+        <span>by <small class="author" itemprop="author">Albert Einstein</small>
+        <a href="/author/Albert-Einstein">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="change,deep-thoughts,thinking,world" /    > 
+            
+            <a class="tag" href="/tag/change/page/1/">change</a>
+            
+            <a class="tag" href="/tag/deep-thoughts/page/1/">deep-thoughts</a>
+            
+            <a class="tag" href="/tag/thinking/page/1/">thinking</a>
+            
+            <a class="tag" href="/tag/world/page/1/">world</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“It is our choices, Harry, that show what we truly are, far more than our abilities.”</span>
+        <span>by <small class="author" itemprop="author">J.K. Rowling</small>
+        <a href="/author/J-K-Rowling">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="abilities,choices" /    > 
+            
+            <a class="tag" href="/tag/abilities/page/1/">abilities</a>
+            
+            <a class="tag" href="/tag/choices/page/1/">choices</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”</span>
+        <span>by <small class="author" itemprop="author">Albert Einstein</small>
+        <a href="/author/Albert-Einstein">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="inspirational,life,live,miracle,miracles" /    > 
+            
+            <a class="tag" href="/tag/inspirational/page/1/">inspirational</a>
+            
+            <a class="tag" href="/tag/life/page/1/">life</a>
+            
+            <a class="tag" href="/tag/live/page/1/">live</a>
+            
+            <a class="tag" href="/tag/miracle/page/1/">miracle</a>
+            
+            <a class="tag" href="/tag/miracles/page/1/">miracles</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.”</span>
+        <span>by <small class="author" itemprop="author">Jane Austen</small>
+        <a href="/author/Jane-Austen">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="aliteracy,books,classic,humor" /    > 
+            
+            <a class="tag" href="/tag/aliteracy/page/1/">aliteracy</a>
+            
+            <a class="tag" href="/tag/books/page/1/">books</a>
+            
+            <a class="tag" href="/tag/classic/page/1/">classic</a>
+            
+            <a class="tag" href="/tag/humor/page/1/">humor</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“Imperfection is beauty, madness is genius and it&#39;s better to be absolutely ridiculous than absolutely boring.”</span>
+        <span>by <small class="author" itemprop="author">Marilyn Monroe</small>
+        <a href="/author/Marilyn-Monroe">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="be-yourself,inspirational" /    > 
+            
+            <a class="tag" href="/tag/be-yourself/page/1/">be-yourself</a>
+            
+            <a class="tag" href="/tag/inspirational/page/1/">inspirational</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“Try not to become a man of success. Rather become a man of value.”</span>
+        <span>by <small class="author" itemprop="author">Albert Einstein</small>
+        <a href="/author/Albert-Einstein">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="adulthood,success,value" /    > 
+            
+            <a class="tag" href="/tag/adulthood/page/1/">adulthood</a>
+            
+            <a class="tag" href="/tag/success/page/1/">success</a>
+            
+            <a class="tag" href="/tag/value/page/1/">value</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“It is better to be hated for what you are than to be loved for what you are not.”</span>
+        <span>by <small class="author" itemprop="author">André Gide</small>
+        <a href="/author/Andre-Gide">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="life,love" /    > 
+            
+            <a class="tag" href="/tag/life/page/1/">life</a>
+            
+            <a class="tag" href="/tag/love/page/1/">love</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“I have not failed. I&#39;ve just found 10,000 ways that won&#39;t work.”</span>
+        <span>by <small class="author" itemprop="author">Thomas A. Edison</small>
+        <a href="/author/Thomas-A-Edison">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="edison,failure,inspirational,paraphrased" /    > 
+            
+            <a class="tag" href="/tag/edison/page/1/">edison</a>
+            
+            <a class="tag" href="/tag/failure/page/1/">failure</a>
+            
+            <a class="tag" href="/tag/inspirational/page/1/">inspirational</a>
+            
+            <a class="tag" href="/tag/paraphrased/page/1/">paraphrased</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“A woman is like a tea bag; you never know how strong it is until it&#39;s in hot water.”</span>
+        <span>by <small class="author" itemprop="author">Eleanor Roosevelt</small>
+        <a href="/author/Eleanor-Roosevelt">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="misattributed-eleanor-roosevelt" /    > 
+            
+            <a class="tag" href="/tag/misattributed-eleanor-roosevelt/page/1/">misattributed-eleanor-roosevelt</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“A day without sunshine is like, you know, night.”</span>
+        <span>by <small class="author" itemprop="author">Steve Martin</small>
+        <a href="/author/Steve-Martin">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="humor,obvious,simile" /    > 
+            
+            <a class="tag" href="/tag/humor/page/1/">humor</a>
+            
+            <a class="tag" href="/tag/obvious/page/1/">obvious</a>
+            
+            <a class="tag" href="/tag/simile/page/1/">simile</a>
+            
+        </div>
+    </div>
+
+    <nav>
+        <ul class="pager">
+            
+            
+            <li class="next">
+                <a href="/page/2/">Next <span aria-hidden="true">&rarr;</span></a>
+            </li>
+            
+        </ul>
+    </nav>
+    </div>
+    <div class="col-md-4 tags-box">
+        
+            <h2>Top Ten tags</h2>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 28px" href="/tag/love/">love</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 26px" href="/tag/inspirational/">inspirational</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 26px" href="/tag/life/">life</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 24px" href="/tag/humor/">humor</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 22px" href="/tag/books/">books</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 14px" href="/tag/reading/">reading</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 10px" href="/tag/friendship/">friendship</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 8px" href="/tag/friends/">friends</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 8px" href="/tag/truth/">truth</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 6px" href="/tag/simile/">simile</a>
+            </span>
+            
+        
+    </div>
+</div>
+
+    </div>
+    <footer class="footer">
+        <div class="container">
+            <p class="text-muted">
+                Quotes by: <a href="https://www.goodreads.com/quotes">GoodReads.com</a>
+            </p>
+            <p class="copyright">
+                Made with <span class='sh-red'>❤</span> by <a href="https://scrapinghub.com">Scrapinghub</a>
+            </p>
+        </div>
+    </footer>
+</body>
+</html>
\ No newline at end of file
diff --git a/docs/conf.py b/docs/conf.py
index 7acf7c7fa..427c79481 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-#
 # Scrapy documentation build configuration file, created by
 # sphinx-quickstart on Mon Nov 24 12:02:52 2008.
 #
@@ -12,13 +10,14 @@
 # serve to show the default.
 
 import sys
+from datetime import datetime
 from os import path
 
 # If your extensions are in another directory, add it here. If the directory
 # is relative to the documentation root, use os.path.abspath to make it
 # absolute, like shown here.
 sys.path.append(path.join(path.dirname(__file__), "_ext"))
-sys.path.append(path.join(path.dirname(path.dirname(__file__)), "scrapy"))
+sys.path.insert(0, path.dirname(path.dirname(__file__)))
 
 
 # General configuration
@@ -26,7 +25,15 @@ sys.path.append(path.join(path.dirname(path.dirname(__file__)), "scrapy"))
 
 # Add any Sphinx extension module names here, as strings. They can be extensions
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ['scrapydocs']
+extensions = [
+    'hoverxref.extension',
+    'notfound.extension',
+    'scrapydocs',
+    'sphinx.ext.autodoc',
+    'sphinx.ext.coverage',
+    'sphinx.ext.intersphinx',
+    'sphinx.ext.viewcode',
+]
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
@@ -41,8 +48,8 @@ source_suffix = '.rst'
 master_doc = 'index'
 
 # General information about the project.
-project = u'Scrapy'
-copyright = u'2008-2013, Scrapy developers'
+project = 'Scrapy'
+copyright = '2008–{}, Scrapy developers'.format(datetime.now().year)
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -70,6 +77,8 @@ language = 'en'
 # List of documents that shouldn't be included in the build.
 #unused_docs = []
 
+exclude_patterns = ['build']
+
 # List of directories, relative to source directory, that shouldn't be searched
 # for source files.
 exclude_trees = ['.build']
@@ -91,14 +100,33 @@ exclude_trees = ['.build']
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = 'sphinx'
 
+# List of Sphinx warnings that will not be raised
+suppress_warnings = ['epub.unknown_project_files']
+
 
 # Options for HTML output
 # -----------------------
 
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+html_theme = 'sphinx_rtd_theme'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+# Add path to the RTD explicitly to robustify builds (otherwise might
+# fail in a clean Debian build env)
+import sphinx_rtd_theme
+html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
+
+
 # The style sheet to use for HTML and HTML Help pages. A file of that name
 # must exist either in Sphinx' static/ path, or in one of the custom paths
 # given in html_static_path.
-html_style = 'scrapydoc.css'
+# html_style = 'scrapydoc.css'
 
 # The name for this set of Sphinx documents.  If None, it defaults to
 # "<project> v<release> documentation".
@@ -125,10 +153,6 @@ html_static_path = ['_static']
 # using the given strftime format.
 html_last_updated_fmt = '%b %d, %Y'
 
-# If true, SmartyPants will be used to convert quotes and dashes to
-# typographically correct entities.
-html_use_smartypants = True
-
 # Custom sidebar templates, maps document names to template names.
 #html_sidebars = {}
 
@@ -172,8 +196,8 @@ htmlhelp_basename = 'Scrapydoc'
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, document class [howto/manual]).
 latex_documents = [
-  ('index', 'Scrapy.tex', ur'Scrapy Documentation',
-   ur'Scrapy developers', 'manual'),
+  ('index', 'Scrapy.tex', 'Scrapy Documentation',
+   'Scrapy developers', 'manual'),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
@@ -203,3 +227,94 @@ linkcheck_ignore = [
     'http://localhost:\d+', 'http://hg.scrapy.org',
     'http://directory.google.com/'
 ]
+
+
+# Options for the Coverage extension
+# ----------------------------------
+coverage_ignore_pyobjects = [
+    # Contract’s add_pre_hook and add_post_hook are not documented because
+    # they should be transparent to contract developers, for whom pre_hook and
+    # post_hook should be the actual concern.
+    r'\bContract\.add_(pre|post)_hook$',
+
+    # ContractsManager is an internal class, developers are not expected to
+    # interact with it directly in any way.
+    r'\bContractsManager\b$',
+
+    # For default contracts we only want to document their general purpose in
+    # their __init__ method, the methods they reimplement to achieve that purpose
+    # should be irrelevant to developers using those contracts.
+    r'\w+Contract\.(adjust_request_args|(pre|post)_process)$',
+
+    # Methods of downloader middlewares are not documented, only the classes
+    # themselves, since downloader middlewares are controlled through Scrapy
+    # settings.
+    r'^scrapy\.downloadermiddlewares\.\w*?\.(\w*?Middleware|DownloaderStats)\.',
+
+    # Base classes of downloader middlewares are implementation details that
+    # are not meant for users.
+    r'^scrapy\.downloadermiddlewares\.\w*?\.Base\w*?Middleware',
+
+    # Private exception used by the command-line interface implementation.
+    r'^scrapy\.exceptions\.UsageError',
+
+    # Methods of BaseItemExporter subclasses are only documented in
+    # BaseItemExporter.
+    r'^scrapy\.exporters\.(?!BaseItemExporter\b)\w*?\.',
+
+    # Extension behavior is only modified through settings. Methods of
+    # extension classes, as well as helper functions, are implementation
+    # details that are not documented.
+    r'^scrapy\.extensions\.[a-z]\w*?\.[A-Z]\w*?\.',  # methods
+    r'^scrapy\.extensions\.[a-z]\w*?\.[a-z]',  # helper functions
+
+    # Never documented before, and deprecated now.
+    r'^scrapy\.item\.DictItem$',
+    r'^scrapy\.linkextractors\.FilteringLinkExtractor$',
+
+    # Implementation detail of LxmlLinkExtractor
+    r'^scrapy\.linkextractors\.lxmlhtml\.LxmlParserLinkExtractor',
+]
+
+
+# Options for the InterSphinx extension
+# -------------------------------------
+
+intersphinx_mapping = {
+    'attrs': ('https://www.attrs.org/en/stable/', None),
+    'coverage': ('https://coverage.readthedocs.io/en/stable', None),
+    'cssselect': ('https://cssselect.readthedocs.io/en/latest', None),
+    'itemloaders': ('https://itemloaders.readthedocs.io/en/latest/', None),
+    'pytest': ('https://docs.pytest.org/en/latest', None),
+    'python': ('https://docs.python.org/3', None),
+    'sphinx': ('https://www.sphinx-doc.org/en/master', None),
+    'tox': ('https://tox.readthedocs.io/en/latest', None),
+    'twisted': ('https://twistedmatrix.com/documents/current', None),
+    'twistedapi': ('https://twistedmatrix.com/documents/current/api', None),
+}
+
+
+# Options for sphinx-hoverxref options
+# ------------------------------------
+
+hoverxref_auto_ref = True
+hoverxref_role_types = {
+    "class": "tooltip",
+    "confval": "tooltip",
+    "hoverxref": "tooltip",
+    "mod": "tooltip",
+    "ref": "tooltip",
+}
+hoverxref_roles = ['command', 'reqmeta', 'setting', 'signal']
+
+
+def setup(app):
+    app.connect('autodoc-skip-member', maybe_skip_member)
+
+
+def maybe_skip_member(app, what, name, obj, skip, options):
+    if not skip:
+        # autodocs was generating a text "alias of" for the following members
+        # https://github.com/sphinx-doc/sphinx/issues/4422
+        return name in {'default_item_class', 'default_selector_class'}
+    return skip
diff --git a/docs/conftest.py b/docs/conftest.py
new file mode 100644
index 000000000..8c735e838
--- /dev/null
+++ b/docs/conftest.py
@@ -0,0 +1,29 @@
+import os
+from doctest import ELLIPSIS, NORMALIZE_WHITESPACE
+
+from scrapy.http.response.html import HtmlResponse
+from sybil import Sybil
+from sybil.parsers.codeblock import CodeBlockParser
+from sybil.parsers.doctest import DocTestParser
+from sybil.parsers.skip import skip
+
+
+def load_response(url, filename):
+    input_path = os.path.join(os.path.dirname(__file__), '_tests', filename)
+    with open(input_path, 'rb') as input_file:
+        return HtmlResponse(url, body=input_file.read())
+
+
+def setup(namespace):
+    namespace['load_response'] = load_response
+
+
+pytest_collect_file = Sybil(
+    parsers=[
+        DocTestParser(optionflags=ELLIPSIS | NORMALIZE_WHITESPACE),
+        CodeBlockParser(future_imports=['print_function']),
+        skip,
+    ],
+    pattern='*.rst',
+    setup=setup,
+).pytest()
diff --git a/docs/contributing.rst b/docs/contributing.rst
index d7a47a746..525ad3497 100644
--- a/docs/contributing.rst
+++ b/docs/contributing.rst
@@ -4,22 +4,31 @@
 Contributing to Scrapy
 ======================
 
+.. important::
+
+    Double check that you are reading the most recent version of this document at
+    https://docs.scrapy.org/en/master/contributing.html
+
 There are many ways to contribute to Scrapy. Here are some of them:
 
 * Blog about Scrapy. Tell the world how you're using Scrapy. This will help
-  newcomers with more examples and the Scrapy project to increase its
+  newcomers with more examples and will help the Scrapy project to increase its
   visibility.
 
 * Report bugs and request features in the `issue tracker`_, trying to follow
   the guidelines detailed in `Reporting bugs`_ below.
 
-* Submit patches for new functionality and/or bug fixes. Please read
-  `Writing patches`_ and `Submitting patches`_ below for details on how to
+* Submit patches for new functionalities and/or bug fixes. Please read
+  :ref:`writing-patches` and `Submitting patches`_ below for details on how to
   write and submit a patch.
 
-* Join the `scrapy-users`_ mailing list and share your ideas on how to
+* Join the `Scrapy subreddit`_ and share your ideas on how to
   improve Scrapy. We're always open to suggestions.
 
+* Answer Scrapy questions at
+  `Stack Overflow <https://stackoverflow.com/questions/tagged/scrapy>`__.
+
+
 Reporting bugs
 ==============
 
@@ -30,33 +39,48 @@ Reporting bugs
     trusted Scrapy developers, and its archives are not public.
 
 Well-written bug reports are very helpful, so keep in mind the following
-guidelines when reporting a new bug.
+guidelines when you're going to report a new bug.
 
 * check the :ref:`FAQ <faq>` first to see if your issue is addressed in a
   well-known question
 
-* check the `open issues`_ to see if it has already been reported. If it has,
-  don't dismiss the report but check the ticket history and comments, you may
-  find additional useful information to contribute.
+* if you have a general question about Scrapy usage, please ask it at
+  `Stack Overflow <https://stackoverflow.com/questions/tagged/scrapy>`__
+  (use "scrapy" tag).
 
-* search the `scrapy-users`_ list to see if it has been discussed there, or
-  if you're not sure if what you're seeing is a bug. You can also ask in the
-  `#scrapy` IRC channel.
+* check the `open issues`_ to see if the issue has already been reported. If it
+  has, don't dismiss the report, but check the ticket history and comments. If 
+  you have additional useful information, please leave a comment, or consider
+  :ref:`sending a pull request <writing-patches>` with a fix.
 
-* write complete, reproducible, specific bug reports. The smaller the test
+* search the `scrapy-users`_ list and `Scrapy subreddit`_ to see if it has
+  been discussed there, or if you're not sure if what you're seeing is a bug.
+  You can also ask in the ``#scrapy`` IRC channel.
+
+* write **complete, reproducible, specific bug reports**. The smaller the test
   case, the better. Remember that other developers won't have your project to
   reproduce the bug, so please include all relevant files required to reproduce
-  it.
+  it. See for example StackOverflow's guide on creating a
+  `Minimal, Complete, and Verifiable example`_ exhibiting the issue.
+
+* the most awesome way to provide a complete reproducible example is to
+  send a pull request which adds a failing test case to the
+  Scrapy testing suite (see :ref:`submitting-patches`).
+  This is helpful even if you don't have an intention to
+  fix the issue yourselves.
 
 * include the output of ``scrapy version -v`` so developers working on your bug
   know exactly which version and platform it occurred on, which is often very
   helpful for reproducing it, or knowing if it was already fixed.
 
+.. _Minimal, Complete, and Verifiable example: https://stackoverflow.com/help/mcve
+
+.. _writing-patches:
+
 Writing patches
 ===============
 
-The better written a patch is, the higher chance that it'll get accepted and
-the sooner that will be merged.
+The better a patch is written, the higher the chances that it'll get accepted and the sooner it will be merged.
 
 Well-written patches should:
 
@@ -75,10 +99,26 @@ Well-written patches should:
   the documentation changes in the same patch.  See `Documentation policies`_
   below.
 
+* if you're adding a private API, please add a regular expression to the
+  ``coverage_ignore_pyobjects`` variable of ``docs/conf.py`` to exclude the new
+  private API from documentation coverage checks.
+
+  To see if your private API is skipped properly, generate a documentation
+  coverage report as follows::
+
+      tox -e docs-coverage
+
+* if you are removing deprecated code, first make sure that at least 1 year
+  (12 months) has passed since the release that introduced the deprecation.
+  See :ref:`deprecation-policy`.
+
+
+.. _submitting-patches:
+
 Submitting patches
 ==================
 
-The best way to submit a patch is to issue a `pull request`_ on Github,
+The best way to submit a patch is to issue a `pull request`_ on GitHub,
 optionally creating a new issue first.
 
 Remember to explain what was fixed or the new functionality (what it is, why
@@ -88,15 +128,41 @@ developers to understand and accept your patch.
 You can also discuss the new functionality (or bug fix) before creating the
 patch, but it's always good to have a patch ready to illustrate your arguments
 and show that you have put some additional thought into the subject. A good
-starting point is to send a pull request on Github. It can be simple enough to
+starting point is to send a pull request on GitHub. It can be simple enough to
 illustrate your idea, and leave documentation/tests for later, after the idea
-has been validated and proven useful. Alternatively, you can send an email to
-`scrapy-users`_ to discuss your idea first.
+has been validated and proven useful. Alternatively, you can start a
+conversation in the `Scrapy subreddit`_ to discuss your idea first.
+
+Sometimes there is an existing pull request for the problem you'd like to
+solve, which is stalled for some reason. Often the pull request is in a
+right direction, but changes are requested by Scrapy maintainers, and the
+original pull request author hasn't had time to address them.
+In this case consider picking up this pull request: open
+a new pull request with all commits from the original pull request, as well as
+additional changes to address the raised issues. Doing so helps a lot; it is
+not considered rude as soon as the original author is acknowledged by keeping
+his/her commits.
+
+You can pull an existing pull request to a local branch
+by running ``git fetch upstream pull/$PR_NUMBER/head:$BRANCH_NAME_TO_CREATE``
+(replace 'upstream' with a remote name for scrapy repository,
+``$PR_NUMBER`` with an ID of the pull request, and ``$BRANCH_NAME_TO_CREATE``
+with a name of the branch you want to create locally).
+See also: https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/checking-out-pull-requests-locally#modifying-an-inactive-pull-request-locally.
+
+When writing GitHub pull requests, try to keep titles short but descriptive.
+E.g. For bug #411: "Scrapy hangs if an exception raises in start_requests"
+prefer "Fix hanging when exception occurs in start_requests (#411)"
+instead of "Fix for #411". Complete titles make it easy to skim through
+the issue tracker.
 
 Finally, try to keep aesthetic changes (:pep:`8` compliance, unused imports
-removal, etc) in separate commits than functional changes. This will make pull
+removal, etc) in separate commits from functional changes. This will make pull
 requests easier to review and more likely to get merged.
 
+
+.. _coding-style:
+
 Coding style
 ============
 
@@ -105,50 +171,84 @@ Scrapy:
 
 * Unless otherwise specified, follow :pep:`8`.
 
-* It's OK to use lines longer than 80 chars if it improves the code
+* It's OK to use lines longer than 79 chars if it improves the code
   readability.
 
-* Don't put your name in the code you contribute. Our policy is to keep
-  the contributor's name in the `AUTHORS`_ file distributed with Scrapy.
+* Don't put your name in the code you contribute; git provides enough
+  metadata to identify author of the code.
+  See https://help.github.com/en/github/using-git/setting-your-username-in-git for
+  setup instructions.
 
-Scrapy Contrib
-==============
-
-Scrapy contrib shares a similar rationale as Django contrib, which is explained
-in `this post <http://jacobian.org/writing/what-is-django-contrib/>`_. If you
-are working on a new functionality, please follow that rationale to decide
-whether it should be a Scrapy contrib. If unsure, you can ask in
-`scrapy-users`_.
+.. _documentation-policies:
 
 Documentation policies
 ======================
 
-* **Don't** use docstrings for documenting classes, or methods which are
-  already documented in the official (sphinx) documentation. For example, the
-  :meth:`ItemLoader.add_value` method should be documented in the sphinx
-  documentation, not its docstring.
+For reference documentation of API members (classes, methods, etc.) use
+docstrings and make sure that the Sphinx documentation uses the
+:mod:`~sphinx.ext.autodoc` extension to pull the docstrings. API reference
+documentation should follow docstring conventions (`PEP 257`_) and be
+IDE-friendly: short, to the point, and it may provide short examples.
 
-* **Do** use docstrings for documenting functions not present in the official
-  (sphinx) documentation, such as functions from ``scrapy.utils`` package and
-  its sub-modules.
+Other types of documentation, such as tutorials or topics, should be covered in
+files within the ``docs/`` directory. This includes documentation that is
+specific to an API member, but goes beyond API reference documentation.
+
+In any case, if something is covered in a docstring, use the
+:mod:`~sphinx.ext.autodoc` extension to pull the docstring into the
+documentation instead of duplicating the docstring in files within the
+``docs/`` directory.
 
 Tests
 =====
 
-Tests are implemented using the `Twisted unit-testing framework`_, running
-tests requires `tox`_.
+Tests are implemented using the :doc:`Twisted unit-testing framework
+<twisted:core/development/policy/test-standard>`. Running tests requires
+:doc:`tox <tox:index>`.
+
+.. _running-tests:
 
 Running tests
 -------------
 
-To run all tests go to the root directory of Scrapy source code and run:
+To run all tests::
 
-    ``tox``
+    tox
 
-To run a specific test (say ``tests/test_contrib_loader.py``) use:
+To run a specific test (say ``tests/test_loader.py``) use:
 
-    ``tox -- tests/test_contrib_loader.py``
+    ``tox -- tests/test_loader.py``
 
+To run the tests on a specific :doc:`tox <tox:index>` environment, use
+``-e <name>`` with an environment name from ``tox.ini``. For example, to run
+the tests with Python 3.6 use::
+
+    tox -e py36
+
+You can also specify a comma-separated list of environments, and use :ref:`tox’s
+parallel mode <tox:parallel_mode>` to run the tests on multiple environments in
+parallel::
+
+    tox -e py36,py38 -p auto
+
+To pass command-line options to :doc:`pytest <pytest:index>`, add them after
+``--`` in your call to :doc:`tox <tox:index>`. Using ``--`` overrides the
+default positional arguments defined in ``tox.ini``, so you must include those
+default positional arguments (``scrapy tests``) after ``--`` as well::
+
+    tox -- scrapy tests -x  # stop after first failure
+
+You can also use the `pytest-xdist`_ plugin. For example, to run all tests on
+the Python 3.6 :doc:`tox <tox:index>` environment using all your CPU cores::
+
+    tox -e py36 -- scrapy tests -n auto
+
+To see coverage report install :doc:`coverage <coverage:index>`
+(``pip install coverage``) and run:
+
+    ``coverage report``
+
+see output of ``coverage --help`` for more options like html or xml report.
 
 Writing tests
 -------------
@@ -161,17 +261,18 @@ Scrapy uses unit-tests, which are located in the `tests/`_ directory.
 Their module name typically resembles the full path of the module they're
 testing. For example, the item loaders code is in::
 
-    scrapy.contrib.loader
+    scrapy.loader
 
 And their unit-tests are in::
 
-    tests/test_contrib_loader.py
+    tests/test_loader.py
 
 .. _issue tracker: https://github.com/scrapy/scrapy/issues
-.. _scrapy-users: http://groups.google.com/group/scrapy-users
-.. _Twisted unit-testing framework: http://twistedmatrix.com/documents/current/core/development/policy/test-standard.html
+.. _scrapy-users: https://groups.google.com/forum/#!forum/scrapy-users
+.. _Scrapy subreddit: https://reddit.com/r/scrapy
 .. _AUTHORS: https://github.com/scrapy/scrapy/blob/master/AUTHORS
 .. _tests/: https://github.com/scrapy/scrapy/tree/master/tests
 .. _open issues: https://github.com/scrapy/scrapy/issues
-.. _pull request: http://help.github.com/send-pull-requests/
-.. _tox: https://pypi.python.org/pypi/tox
+.. _PEP 257: https://www.python.org/dev/peps/pep-0257/
+.. _pull request: https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request
+.. _pytest-xdist: https://github.com/pytest-dev/pytest-xdist
diff --git a/docs/experimental/index.rst b/docs/experimental/index.rst
deleted file mode 100644
index 1c019c396..000000000
--- a/docs/experimental/index.rst
+++ /dev/null
@@ -1,34 +0,0 @@
-.. _experimental:
-
-Experimental features
-=====================
-
-This section documents experimental Scrapy features that may become stable in
-future releases, but whose API is not yet stable. Use them with caution, and
-subscribe to the `mailing lists <http://scrapy.org/community/>`_ to get
-notified of any changes. 
-
-Since it's not revised so frequently, this section may contain documentation
-which is outdated, incomplete or overlapping with stable documentation (until
-it's properly merged) . Use at your own risk.
-
-.. warning::
-
-   This documentation is a work in progress. Use at your own risk.
-
-Add commands using external libraries
--------------------------------------
-
-You can also add Scrapy commands from an external library by adding `scrapy.commands` section into entry_points in the `setup.py`.
-
-The following example adds `my_command` command::
-
-  from setuptools import setup, find_packages
-
-  setup(name='scrapy-mymodule',
-    entry_points={
-      'scrapy.commands': [
-        'my_command=my_scrapy_module.commands:MyCommand',
-      ],
-    },
-   )
diff --git a/docs/faq.rst b/docs/faq.rst
index 47bfede71..9346ec358 100644
--- a/docs/faq.rst
+++ b/docs/faq.rst
@@ -3,6 +3,8 @@
 Frequently Asked Questions
 ==========================
 
+.. _faq-scrapy-bs-cmp:
+
 How does Scrapy compare to BeautifulSoup or lxml?
 -------------------------------------------------
 
@@ -19,33 +21,56 @@ Python code.
 In other words, comparing `BeautifulSoup`_ (or `lxml`_) to Scrapy is like
 comparing `jinja2`_ to `Django`_.
 
-.. _BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/
-.. _lxml: http://lxml.de/
-.. _jinja2: http://jinja.pocoo.org/2/
-.. _Django: http://www.djangoproject.com
+.. _BeautifulSoup: https://www.crummy.com/software/BeautifulSoup/
+.. _lxml: https://lxml.de/
+.. _jinja2: https://palletsprojects.com/p/jinja/
+.. _Django: https://www.djangoproject.com/
 
-.. _faq-python-versions:
+Can I use Scrapy with BeautifulSoup?
+------------------------------------
 
-What Python versions does Scrapy support?
------------------------------------------
+Yes, you can.
+As mentioned :ref:`above <faq-scrapy-bs-cmp>`, `BeautifulSoup`_ can be used
+for parsing HTML responses in Scrapy callbacks.
+You just have to feed the response's body into a ``BeautifulSoup`` object
+and extract whatever data you need from it.
 
-Scrapy is supported under Python 2.7 only.
-Python 2.6 support was dropped starting at Scrapy 0.20.
+Here's an example spider using BeautifulSoup API, with ``lxml`` as the HTML parser::
 
-Does Scrapy work with Python 3?
----------------------------------
 
-No, but there are plans to support Python 3.3+.
-At the moment, Scrapy works with Python 2.7.
+    from bs4 import BeautifulSoup
+    import scrapy
+
+
+    class ExampleSpider(scrapy.Spider):
+        name = "example"
+        allowed_domains = ["example.com"]
+        start_urls = (
+            'http://www.example.com/',
+        )
+
+        def parse(self, response):
+            # use lxml to get decent HTML parsing speed
+            soup = BeautifulSoup(response.text, 'lxml')
+            yield {
+                "url": response.url,
+                "title": soup.h1.string
+            }
+
+.. note::
+
+    ``BeautifulSoup`` supports several HTML/XML parsers.
+    See `BeautifulSoup's official documentation`_ on which ones are available.
+
+.. _BeautifulSoup's official documentation: https://www.crummy.com/software/BeautifulSoup/bs4/doc/#specifying-the-parser-to-use
 
-.. seealso:: :ref:`faq-python-versions`.
 
 Did Scrapy "steal" X from Django?
 ---------------------------------
 
 Probably, but we don't like that word. We think Django_ is a great open source
 project and an example to follow, so we've used it as an inspiration for
-Scrapy. 
+Scrapy.
 
 We believe that, if something is already done well, there's no need to reinvent
 it. This concept, besides being one of the foundations for open source and free
@@ -57,14 +82,12 @@ focus on the real problems we need to solve.
 We'd be proud if Scrapy serves as an inspiration for other projects. Feel free
 to steal from us!
 
-.. _Django: http://www.djangoproject.com
-
 Does Scrapy work with HTTP proxies?
 -----------------------------------
 
 Yes. Support for HTTP proxies is provided (since Scrapy 0.8) through the HTTP
 Proxy downloader middleware. See
-:class:`~scrapy.contrib.downloadermiddleware.httpproxy.HttpProxyMiddleware`.
+:class:`~scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware`.
 
 How can I scrape an item with attributes in different pages?
 ------------------------------------------------------------
@@ -77,25 +100,37 @@ Scrapy crashes with: ImportError: No module named win32api
 
 You need to install `pywin32`_ because of `this Twisted bug`_.
 
-.. _pywin32: http://sourceforge.net/projects/pywin32/
-.. _this Twisted bug: http://twistedmatrix.com/trac/ticket/3707
+.. _pywin32: https://sourceforge.net/projects/pywin32/
+.. _this Twisted bug: https://twistedmatrix.com/trac/ticket/3707
 
 How can I simulate a user login in my spider?
 ---------------------------------------------
 
 See :ref:`topics-request-response-ref-request-userlogin`.
 
+.. _faq-bfo-dfo:
+
 Does Scrapy crawl in breadth-first or depth-first order?
 --------------------------------------------------------
 
 By default, Scrapy uses a `LIFO`_ queue for storing pending requests, which
 basically means that it crawls in `DFO order`_. This order is more convenient
-in most cases. If you do want to crawl in true `BFO order`_, you can do it by
+in most cases.
+
+If you do want to crawl in true `BFO order`_, you can do it by
 setting the following settings::
 
     DEPTH_PRIORITY = 1
-    SCHEDULER_DISK_QUEUE = 'scrapy.squeue.PickleFifoDiskQueue'
-    SCHEDULER_MEMORY_QUEUE = 'scrapy.squeue.FifoMemoryQueue'
+    SCHEDULER_DISK_QUEUE = 'scrapy.squeues.PickleFifoDiskQueue'
+    SCHEDULER_MEMORY_QUEUE = 'scrapy.squeues.FifoMemoryQueue'
+
+While pending requests are below the configured values of
+:setting:`CONCURRENT_REQUESTS`, :setting:`CONCURRENT_REQUESTS_PER_DOMAIN` or
+:setting:`CONCURRENT_REQUESTS_PER_IP`, those requests are sent
+concurrently. As a result, the first few requests of a crawl rarely follow the
+desired order. Lowering those settings to ``1`` enforces the desired order, but
+it significantly slows down the crawl as a whole.
+
 
 My Scrapy crawler has memory leaks. What can I do?
 --------------------------------------------------
@@ -113,7 +148,7 @@ See previous question.
 Can I use Basic HTTP Authentication in my spiders?
 --------------------------------------------------
 
-Yes, see :class:`~scrapy.contrib.downloadermiddleware.httpauth.HttpAuthMiddleware`.
+Yes, see :class:`~scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware`.
 
 Why does Scrapy download pages in English instead of my native language?
 ------------------------------------------------------------------------
@@ -121,7 +156,7 @@ Why does Scrapy download pages in English instead of my native language?
 Try changing the default `Accept-Language`_ request header by overriding the
 :setting:`DEFAULT_REQUEST_HEADERS` setting.
 
-.. _Accept-Language: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.4
+.. _Accept-Language: https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.4
 
 Where can I find some example Scrapy projects?
 ----------------------------------------------
@@ -144,23 +179,23 @@ I get "Filtered offsite request" messages. How can I fix them?
 Those messages (logged with ``DEBUG`` level) don't necessarily mean there is a
 problem, so you may not need to fix them.
 
-Those message are thrown by the Offsite Spider Middleware, which is a spider
+Those messages are thrown by the Offsite Spider Middleware, which is a spider
 middleware (enabled by default) whose purpose is to filter out requests to
 domains outside the ones covered by the spider.
 
 For more info see:
-:class:`~scrapy.contrib.spidermiddleware.offsite.OffsiteMiddleware`.
+:class:`~scrapy.spidermiddlewares.offsite.OffsiteMiddleware`.
 
 What is the recommended way to deploy a Scrapy crawler in production?
 ---------------------------------------------------------------------
 
-See :ref:`topics-scrapyd`.
+See :ref:`topics-deploy`.
 
 Can I use JSON for large exports?
 ---------------------------------
 
 It'll depend on how large your output is. See :ref:`this warning
-<json-with-large-data>` in :class:`~scrapy.contrib.exporter.JsonItemExporter`
+<json-with-large-data>` in :class:`~scrapy.exporters.JsonItemExporter`
 documentation.
 
 Can I return (Twisted) deferreds from signal handlers?
@@ -190,7 +225,7 @@ Or by setting a global download delay in your project with the
 Can I call ``pdb.set_trace()`` from my spiders to debug them?
 -------------------------------------------------------------
 
-Yes, but you can also use the Scrapy shell which allows you too quickly analyze
+Yes, but you can also use the Scrapy shell which allows you to quickly analyze
 (and even modify) the response being processed by your spider, which is, quite
 often, more useful than plain old ``pdb.set_trace()``.
 
@@ -201,15 +236,15 @@ Simplest way to dump all my scraped items into a JSON/CSV/XML file?
 
 To dump into a JSON file::
 
-    scrapy crawl myspider -o items.json
+    scrapy crawl myspider -O items.json
 
 To dump into a CSV file::
 
-    scrapy crawl myspider -o items.csv
+    scrapy crawl myspider -O items.csv
 
 To dump into a XML file::
 
-    scrapy crawl myspider -o items.xml
+    scrapy crawl myspider -O items.xml
 
 For more information see :ref:`topics-feed-exports`
 
@@ -220,8 +255,8 @@ The ``__VIEWSTATE`` parameter is used in sites built with ASP.NET/VB.NET. For
 more info on how it works see `this page`_. Also, here's an `example spider`_
 which scrapes one of these sites.
 
-.. _this page: http://search.cpan.org/~ecarroll/HTML-TreeBuilderX-ASP_NET-0.09/lib/HTML/TreeBuilderX/ASP_NET.pm
-.. _example spider: http://github.com/AmbientLighter/rpn-fas/blob/master/fas/spiders/rnp.py
+.. _this page: https://metacpan.org/pod/release/ECARROLL/HTML-TreeBuilderX-ASP_NET-0.09/lib/HTML/TreeBuilderX/ASP_NET.pm
+.. _example spider: https://github.com/AmbientLighter/rpn-fas/blob/master/fas/spiders/rnp.py
 
 What's the best way to parse big XML/CSV data feeds?
 ----------------------------------------------------
@@ -280,38 +315,63 @@ I'm scraping a XML document and my XPath selector doesn't return any items
 
 You may need to remove namespaces. See :ref:`removing-namespaces`.
 
+.. _faq-split-item:
 
-I'm getting an error: "cannot import name crawler"
+How to split an item into multiple items in an item pipeline?
+-------------------------------------------------------------
+
+:ref:`Item pipelines <topics-item-pipeline>` cannot yield multiple items per
+input item. :ref:`Create a spider middleware <custom-spider-middleware>`
+instead, and use its
+:meth:`~scrapy.spidermiddlewares.SpiderMiddleware.process_spider_output`
+method for this purpose. For example::
+
+    from copy import deepcopy
+
+    from itemadapter import is_item, ItemAdapter
+
+    class MultiplyItemsMiddleware:
+
+        def process_spider_output(self, response, result, spider):
+            for item in result:
+                if is_item(item):
+                    adapter = ItemAdapter(item)
+                    for _ in range(adapter['multiply_by']):
+                        yield deepcopy(item)
+
+Does Scrapy support IPv6 addresses?
+-----------------------------------
+
+Yes, by setting :setting:`DNS_RESOLVER` to ``scrapy.resolver.CachingHostnameResolver``.
+Note that by doing so, you lose the ability to set a specific timeout for DNS requests
+(the value of the :setting:`DNS_TIMEOUT` setting is ignored).
+
+
+.. _faq-specific-reactor:
+
+How to deal with ``<class 'ValueError'>: filedescriptor out of range in select()`` exceptions?
+----------------------------------------------------------------------------------------------
+
+This issue `has been reported`_ to appear when running broad crawls in macOS, where the default
+Twisted reactor is :class:`twisted.internet.selectreactor.SelectReactor`. Switching to a
+different reactor is possible by using the :setting:`TWISTED_REACTOR` setting.
+
+
+.. _faq-stop-response-download:
+
+How can I cancel the download of a given response?
 --------------------------------------------------
 
-This is caused by Scrapy changes due to the singletons removal. The error is
-most likely raised by a module (extension, middleware, pipeline or spider) in
-your Scrapy project that imports ``crawler`` from ``scrapy.project``. For
-example::
+In some situations, it might be useful to stop the download of a certain response.
+For instance, if you only need the first part of a large response and you would like
+to save resources by avoiding the download of the whole body.
+In that case, you could attach a handler to the :class:`~scrapy.signals.bytes_received`
+signal and raise a :exc:`~scrapy.exceptions.StopDownload` exception. Please refer to
+the :ref:`topics-stop-response-download` topic for additional information and examples.
 
-    from scrapy.project import crawler
 
-    class SomeExtension(object):
-        def __init__(self):
-            self.crawler = crawler
-            # ...
-
-This way to access the crawler object is deprecated, the code should be ported
-to use ``from_crawler`` class method, for example::
-
-    class SomeExtension(object):
-
-        @classmethod
-        def from_crawler(cls, crawler):
-            o = cls()
-            o.crawler = crawler
-            return o
-
-Scrapy command line tool has some backwards compatibility in place to support
-the old import mechanism (with a deprecation warning), but this mechanism may
-not work if you use Scrapy differently (for example, as a library).
-
-.. _user agents: http://en.wikipedia.org/wiki/User_agent
-.. _LIFO: http://en.wikipedia.org/wiki/LIFO
-.. _DFO order: http://en.wikipedia.org/wiki/Depth-first_search
-.. _BFO order: http://en.wikipedia.org/wiki/Breadth-first_search
+.. _has been reported: https://github.com/scrapy/scrapy/issues/2905
+.. _user agents: https://en.wikipedia.org/wiki/User_agent
+.. _LIFO: https://en.wikipedia.org/wiki/Stack_(abstract_data_type)
+.. _DFO order: https://en.wikipedia.org/wiki/Depth-first_search
+.. _BFO order: https://en.wikipedia.org/wiki/Breadth-first_search
diff --git a/docs/index.rst b/docs/index.rst
index 2a1ae037b..11aa5c9be 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -4,7 +4,13 @@
 Scrapy |version| documentation
 ==============================
 
-This documentation contains everything you need to know about Scrapy.
+Scrapy is a fast high-level `web crawling`_ and `web scraping`_ framework, used
+to crawl websites and extract structured data from their pages. It can be used
+for a wide range of purposes, from data mining to monitoring and automated
+testing.
+
+.. _web crawling: https://en.wikipedia.org/wiki/Web_crawler
+.. _web scraping: https://en.wikipedia.org/wiki/Web_scraping
 
 Getting help
 ============
@@ -13,13 +19,15 @@ Having trouble? We'd like to help!
 
 * Try the :doc:`FAQ <faq>` -- it's got answers to some common questions.
 * Looking for specific information? Try the :ref:`genindex` or :ref:`modindex`.
-* Search for information in the `archives of the scrapy-users mailing list`_, or
-  `post a question`_.
-* Ask a question in the `#scrapy IRC channel`_.
+* Ask or search questions in `StackOverflow using the scrapy tag`_.
+* Ask or search questions in the `Scrapy subreddit`_.
+* Search for questions on the archives of the `scrapy-users mailing list`_.
+* Ask a question in the `#scrapy IRC channel`_,
 * Report bugs with Scrapy in our `issue tracker`_.
 
-.. _archives of the scrapy-users mailing list: http://groups.google.com/group/scrapy-users/
-.. _post a question: http://groups.google.com/group/scrapy-users/
+.. _scrapy-users mailing list: https://groups.google.com/forum/#!forum/scrapy-users
+.. _Scrapy subreddit: https://www.reddit.com/r/scrapy/
+.. _StackOverflow using the scrapy tag: https://stackoverflow.com/tags/scrapy
 .. _#scrapy IRC channel: irc://irc.freenode.net/scrapy
 .. _issue tracker: https://github.com/scrapy/scrapy/issues
 
@@ -28,6 +36,7 @@ First steps
 ===========
 
 .. toctree::
+   :caption: First steps
    :hidden:
 
    intro/overview
@@ -53,24 +62,26 @@ Basic concepts
 ==============
 
 .. toctree::
+   :caption: Basic concepts
    :hidden:
 
    topics/commands
-   topics/items
    topics/spiders
    topics/selectors
+   topics/items
    topics/loaders
    topics/shell
    topics/item-pipeline
    topics/feed-exports
+   topics/request-response
    topics/link-extractors
+   topics/settings
+   topics/exceptions
+
 
 :doc:`topics/commands`
     Learn about the command-line tool used to manage your Scrapy project.
 
-:doc:`topics/items`
-    Define the data you want to scrape.
-
 :doc:`topics/spiders`
     Write the rules to crawl your websites.
 
@@ -80,6 +91,9 @@ Basic concepts
 :doc:`topics/shell`
     Test your extraction code in an interactive environment.
 
+:doc:`topics/items`
+    Define the data you want to scrape.
+
 :doc:`topics/loaders`
     Populate your items with the extracted data.
 
@@ -89,13 +103,24 @@ Basic concepts
 :doc:`topics/feed-exports`
     Output your scraped data using different formats and storages.
 
+:doc:`topics/request-response`
+    Understand the classes used to represent HTTP requests and responses.
+
 :doc:`topics/link-extractors`
     Convenient classes to extract links to follow from pages.
 
+:doc:`topics/settings`
+    Learn how to configure Scrapy and see all :ref:`available settings <topics-settings-ref>`.
+
+:doc:`topics/exceptions`
+    See all available exceptions and their meaning.
+
+
 Built-in services
 =================
 
 .. toctree::
+   :caption: Built-in services
    :hidden:
 
    topics/logging
@@ -105,8 +130,8 @@ Built-in services
    topics/webservice
 
 :doc:`topics/logging`
-    Understand the simple logging facility provided by Scrapy.
-   
+    Learn how to use Python's builtin logging on Scrapy.
+
 :doc:`topics/stats`
     Collect statistics about your scraping crawler.
 
@@ -124,6 +149,7 @@ Solving specific problems
 =========================
 
 .. toctree::
+   :caption: Solving specific problems
    :hidden:
 
    faq
@@ -131,22 +157,22 @@ Solving specific problems
    topics/contracts
    topics/practices
    topics/broad-crawls
-   topics/firefox
-   topics/firebug
+   topics/developer-tools
+   topics/dynamic-content
    topics/leaks
-   topics/images
-   topics/ubuntu
-   topics/scrapyd
+   topics/media-pipeline
+   topics/deploy
    topics/autothrottle
    topics/benchmarking
    topics/jobs
-   topics/djangoitem
+   topics/coroutines
+   topics/asyncio
 
 :doc:`faq`
     Get answers to most frequently asked questions.
 
 :doc:`topics/debug`
-    Learn how to debug common problems of your scrapy spider.
+    Learn how to debug common problems of your Scrapy spider.
 
 :doc:`topics/contracts`
     Learn how to use contracts for testing your spiders.
@@ -157,23 +183,20 @@ Solving specific problems
 :doc:`topics/broad-crawls`
     Tune Scrapy for crawling a lot domains in parallel.
 
-:doc:`topics/firefox`
-    Learn how to scrape with Firefox and some useful add-ons.
+:doc:`topics/developer-tools`
+    Learn how to scrape with your browser's developer tools.
 
-:doc:`topics/firebug`
-    Learn how to scrape efficiently using Firebug.
+:doc:`topics/dynamic-content`
+    Read webpage data that is loaded dynamically.
 
 :doc:`topics/leaks`
     Learn how to find and get rid of memory leaks in your crawler.
 
-:doc:`topics/images`
-    Download static images associated with your scraped items.
+:doc:`topics/media-pipeline`
+    Download files and/or images associated with your scraped items.
 
-:doc:`topics/ubuntu`
-    Install latest Scrapy packages easily on Ubuntu
-
-:doc:`topics/scrapyd`
-    Deploying your Scrapy project in production.
+:doc:`topics/deploy`
+    Deploying your Scrapy spiders and run them in a remote server.
 
 :doc:`topics/autothrottle`
     Adjust crawl rate dynamically based on load.
@@ -184,8 +207,11 @@ Solving specific problems
 :doc:`topics/jobs`
     Learn how to pause and resume crawls for large spiders.
 
-:doc:`topics/djangoitem`
-    Write scraped items using Django models.
+:doc:`topics/coroutines`
+    Use the :ref:`coroutine syntax <async>`.
+
+:doc:`topics/asyncio`
+    Use :mod:`asyncio` and :mod:`asyncio`-powered libraries.
 
 .. _extending-scrapy:
 
@@ -193,6 +219,7 @@ Extending Scrapy
 ================
 
 .. toctree::
+   :caption: Extending Scrapy
    :hidden:
 
    topics/architecture
@@ -200,6 +227,9 @@ Extending Scrapy
    topics/spider-middleware
    topics/extensions
    topics/api
+   topics/signals
+   topics/exporters
+
 
 :doc:`topics/architecture`
     Understand the Scrapy architecture.
@@ -216,33 +246,9 @@ Extending Scrapy
 :doc:`topics/api`
     Use it on extensions and middlewares to extend Scrapy functionality
 
-Reference
-=========
-
-.. toctree::
-   :hidden:
-
-   topics/request-response
-   topics/settings
-   topics/signals
-   topics/exceptions
-   topics/exporters
-
-:doc:`topics/commands`
-    Learn about the command-line tool and see all :ref:`available commands <topics-commands-ref>`.
-
-:doc:`topics/request-response`
-    Understand the classes used to represent HTTP requests and responses.
-
-:doc:`topics/settings`
-    Learn how to configure Scrapy and see all :ref:`available settings <topics-settings-ref>`.
-
 :doc:`topics/signals`
     See all available signals and how to work with them.
 
-:doc:`topics/exceptions`
-    See all available exceptions and their meaning.
-
 :doc:`topics/exporters`
     Quickly export your scraped items to a file (XML, CSV, etc).
 
@@ -251,12 +257,12 @@ All the rest
 ============
 
 .. toctree::
+   :caption: All the rest
    :hidden:
 
    news
    contributing
    versioning
-   experimental/index
 
 :doc:`news`
     See what has changed in recent Scrapy versions.
@@ -266,6 +272,3 @@ All the rest
 
 :doc:`versioning`
     Understand Scrapy versioning and API stability.
-
-:doc:`experimental/index`
-    Learn about bleeding-edge features.
diff --git a/docs/intro/examples.rst b/docs/intro/examples.rst
index 40a124679..96363c7d5 100644
--- a/docs/intro/examples.rst
+++ b/docs/intro/examples.rst
@@ -5,21 +5,16 @@ Examples
 ========
 
 The best way to learn is with examples, and Scrapy is no exception. For this
-reason, there is an example Scrapy project named dirbot_, that you can use to
-play and learn more about Scrapy. It contains the dmoz spider described in the
-tutorial.
+reason, there is an example Scrapy project named quotesbot_, that you can use to
+play and learn more about Scrapy. It contains two spiders for
+http://quotes.toscrape.com, one using CSS selectors and another one using XPath
+expressions.
 
-This dirbot_ project is available at: https://github.com/scrapy/dirbot
-
-It contains a README file with a detailed description of the project contents.
+The quotesbot_ project is available at: https://github.com/scrapy/quotesbot.
+You can find more information about it in the project's README.
 
 If you're familiar with git, you can checkout the code. Otherwise you can
-download a tarball or zip file of the project by clicking on `Downloads`_.
+download the project as a zip file by clicking
+`here <https://github.com/scrapy/quotesbot/archive/master.zip>`_.
 
-The `scrapy tag on Snipplr`_ is used for sharing code snippets such as spiders,
-middlewares, extensions, or scripts. Feel free (and encouraged!) to share any
-code there.
-
-.. _dirbot: https://github.com/scrapy/dirbot
-.. _Downloads: https://github.com/scrapy/dirbot/archives/master
-.. _scrapy tag on Snipplr: http://snipplr.com/all/tags/scrapy/
+.. _quotesbot: https://github.com/scrapy/quotesbot
diff --git a/docs/intro/install.rst b/docs/intro/install.rst
index ffba0e2b3..6d65ae2ee 100644
--- a/docs/intro/install.rst
+++ b/docs/intro/install.rst
@@ -4,90 +4,271 @@
 Installation guide
 ==================
 
+.. _faq-python-versions:
+
+Supported Python versions
+=========================
+
+Scrapy requires Python 3.5.2+, either the CPython implementation (default) or
+the PyPy 5.9+ implementation (see :ref:`python:implementations`).
+
+
 Installing Scrapy
 =================
 
-.. note:: Check :ref:`intro-install-platform-notes` first.
+If you're using `Anaconda`_ or `Miniconda`_, you can install the package from
+the `conda-forge`_ channel, which has up-to-date packages for Linux, Windows
+and macOS.
 
-The installation steps assume that you have the following things installed:
+To install Scrapy using ``conda``, run::
 
-* `Python`_ 2.7
+  conda install -c conda-forge scrapy
 
-* `pip`_ and `setuptools`_ Python packages. Nowadays `pip`_ requires and
-  installs `setuptools`_ if not installed.
+Alternatively, if you’re already familiar with installation of Python packages,
+you can install Scrapy and its dependencies from PyPI with::
 
-* `lxml`_. Most Linux distributions ships prepackaged versions of lxml.
-  Otherwise refer to http://lxml.de/installation.html
+    pip install Scrapy
 
-* `OpenSSL`_. This comes preinstalled in all operating systems, except Windows
-  where the Python installer ships it bundled.
+Note that sometimes this may require solving compilation issues for some Scrapy
+dependencies depending on your operating system, so be sure to check the
+:ref:`intro-install-platform-notes`.
 
-You can install Scrapy using pip (which is the canonical way to install Python
-packages).
+We strongly recommend that you install Scrapy in :ref:`a dedicated virtualenv <intro-using-virtualenv>`,
+to avoid conflicting with your system packages.
 
-To install using pip::
+For more detailed and platform specifics instructions, as well as
+troubleshooting information, read on.
+
+
+Things that are good to know
+----------------------------
+
+Scrapy is written in pure Python and depends on a few key Python packages (among others):
+
+* `lxml`_, an efficient XML and HTML parser
+* `parsel`_, an HTML/XML data extraction library written on top of lxml,
+* `w3lib`_, a multi-purpose helper for dealing with URLs and web page encodings
+* `twisted`_, an asynchronous networking framework
+* `cryptography`_ and `pyOpenSSL`_, to deal with various network-level security needs
+
+The minimal versions which Scrapy is tested against are:
+
+* Twisted 14.0
+* lxml 3.4
+* pyOpenSSL 0.14
+
+Scrapy may work with older versions of these packages
+but it is not guaranteed it will continue working
+because it’s not being tested against them.
+
+Some of these packages themselves depends on non-Python packages
+that might require additional installation steps depending on your platform.
+Please check :ref:`platform-specific guides below <intro-install-platform-notes>`.
+
+In case of any trouble related to these dependencies,
+please refer to their respective installation instructions:
+
+* `lxml installation`_
+* `cryptography installation`_
+
+.. _lxml installation: https://lxml.de/installation.html
+.. _cryptography installation: https://cryptography.io/en/latest/installation/
+
+
+.. _intro-using-virtualenv:
+
+Using a virtual environment (recommended)
+-----------------------------------------
+
+TL;DR: We recommend installing Scrapy inside a virtual environment
+on all platforms.
+
+Python packages can be installed either globally (a.k.a system wide),
+or in user-space. We do not recommend installing Scrapy system wide.
+
+Instead, we recommend that you install Scrapy within a so-called
+"virtual environment" (:mod:`venv`).
+Virtual environments allow you to not conflict with already-installed Python
+system packages (which could break some of your system tools and scripts),
+and still install packages normally with ``pip`` (without ``sudo`` and the likes).
+
+See :ref:`tut-venv` on how to create your virtual environment.
+
+Once you have created a virtual environment, you can install Scrapy inside it with ``pip``,
+just like any other Python package.
+(See :ref:`platform-specific guides <intro-install-platform-notes>`
+below for non-Python dependencies that you may need to install beforehand).
 
-   pip install Scrapy
 
 .. _intro-install-platform-notes:
 
 Platform specific installation notes
 ====================================
 
+.. _intro-install-windows:
+
 Windows
 -------
 
-* Install Python 2.7 from http://python.org/download/
+Though it's possible to install Scrapy on Windows using pip, we recommend you
+to install `Anaconda`_ or `Miniconda`_ and use the package from the
+`conda-forge`_ channel, which will avoid most installation issues.
 
-  You need to adjust ``PATH`` environment variable to include paths to
-  the Python executable and additional scripts. The following paths need to be
-  added to ``PATH``::
+Once you've installed `Anaconda`_ or `Miniconda`_, install Scrapy with::
 
-      C:\Python2.7\;C:\Python2.7\Scripts\;
+  conda install -c conda-forge scrapy
 
-  To update the ``PATH`` open a Command prompt and run::
 
-      c:\python27\python.exe c:\python27\tools\scripts\win_add2path.py
+.. _intro-install-ubuntu:
 
-  Close the command prompt window and reopen it so changes take effect, run the
-  following command and check it shows the expected Python version::
+Ubuntu 14.04 or above
+---------------------
 
-      python --version
-
-* Install `pip`_ from https://pip.pypa.io/en/latest/installing.html
-
-  Now open a Command prompt to check ``pip`` is installed correctly:: 
-
-      pip --version
-
-* At this point Python 2.7 and ``pip`` package manager must be working, let's
-  install Scrapy::
-
-      pip install Scrapy
-
-Ubuntu 9.10 or above
-~~~~~~~~~~~~~~~~~~~~
+Scrapy is currently tested with recent-enough versions of lxml,
+twisted and pyOpenSSL, and is compatible with recent Ubuntu distributions.
+But it should support older versions of Ubuntu too, like Ubuntu 14.04,
+albeit with potential issues with TLS connections.
 
 **Don't** use the ``python-scrapy`` package provided by Ubuntu, they are
 typically too old and slow to catch up with latest Scrapy.
 
-Instead, use the official :ref:`Ubuntu Packages <topics-ubuntu>`, which already
-solve all dependencies for you and are continuously updated with the latest bug
-fixes.
 
-Archlinux
-~~~~~~~~~
+To install Scrapy on Ubuntu (or Ubuntu-based) systems, you need to install
+these dependencies::
 
-You can follow the generic instructions or install Scrapy from `AUR Scrapy package`::
+    sudo apt-get install python3 python3-dev python3-pip libxml2-dev libxslt1-dev zlib1g-dev libffi-dev libssl-dev
 
-    yaourt -S scrapy
+- ``python3-dev``, ``zlib1g-dev``, ``libxml2-dev`` and ``libxslt1-dev``
+  are required for ``lxml``
+- ``libssl-dev`` and ``libffi-dev`` are required for ``cryptography``
+
+Inside a :ref:`virtualenv <intro-using-virtualenv>`,
+you can install Scrapy with ``pip`` after that::
+
+    pip install scrapy
+
+.. note::
+    The same non-Python dependencies can be used to install Scrapy in Debian
+    Jessie (8.0) and above.
 
 
-.. _Python: http://www.python.org
-.. _pip: http://www.pip-installer.org/en/latest/installing.html
-.. _easy_install: http://pypi.python.org/pypi/setuptools
-.. _Control Panel: http://www.microsoft.com/resources/documentation/windows/xp/all/proddocs/en-us/sysdm_advancd_environmnt_addchange_variable.mspx
-.. _lxml: http://lxml.de/
-.. _OpenSSL: https://pypi.python.org/pypi/pyOpenSSL
+.. _intro-install-macos:
+
+macOS
+-----
+
+Building Scrapy's dependencies requires the presence of a C compiler and
+development headers. On macOS this is typically provided by Apple’s Xcode
+development tools. To install the Xcode command line tools open a terminal
+window and run::
+
+    xcode-select --install
+
+There's a `known issue <https://github.com/pypa/pip/issues/2468>`_ that
+prevents ``pip`` from updating system packages. This has to be addressed to
+successfully install Scrapy and its dependencies. Here are some proposed
+solutions:
+
+* *(Recommended)* **Don't** use system python, install a new, updated version
+  that doesn't conflict with the rest of your system. Here's how to do it using
+  the `homebrew`_ package manager:
+
+  * Install `homebrew`_ following the instructions in https://brew.sh/
+
+  * Update your ``PATH`` variable to state that homebrew packages should be
+    used before system packages (Change ``.bashrc`` to ``.zshrc`` accordantly
+    if you're using `zsh`_ as default shell)::
+
+      echo "export PATH=/usr/local/bin:/usr/local/sbin:$PATH" >> ~/.bashrc
+
+  * Reload ``.bashrc`` to ensure the changes have taken place::
+
+      source ~/.bashrc
+
+  * Install python::
+
+      brew install python
+
+  * Latest versions of python have ``pip`` bundled with them so you won't need
+    to install it separately. If this is not the case, upgrade python::
+
+      brew update; brew upgrade python
+
+*   *(Optional)* :ref:`Install Scrapy inside a Python virtual environment
+    <intro-using-virtualenv>`.
+
+  This method is a workaround for the above macOS issue, but it's an overall
+  good practice for managing dependencies and can complement the first method.
+
+After any of these workarounds you should be able to install Scrapy::
+
+  pip install Scrapy
+
+
+PyPy
+----
+
+We recommend using the latest PyPy version. The version tested is 5.9.0.
+For PyPy3, only Linux installation was tested.
+
+Most Scrapy dependencides now have binary wheels for CPython, but not for PyPy.
+This means that these dependecies will be built during installation.
+On macOS, you are likely to face an issue with building Cryptography dependency,
+solution to this problem is described
+`here <https://github.com/pyca/cryptography/issues/2692#issuecomment-272773481>`_,
+that is to ``brew install openssl`` and then export the flags that this command
+recommends (only needed when installing Scrapy). Installing on Linux has no special
+issues besides installing build dependencies.
+Installing Scrapy with PyPy on Windows is not tested.
+
+You can check that Scrapy is installed correctly by running ``scrapy bench``.
+If this command gives errors such as
+``TypeError: ... got 2 unexpected keyword arguments``, this means
+that setuptools was unable to pick up one PyPy-specific dependency.
+To fix this issue, run ``pip install 'PyPyDispatcher>=2.1.0'``.
+
+
+.. _intro-install-troubleshooting:
+
+Troubleshooting
+===============
+
+AttributeError: 'module' object has no attribute 'OP_NO_TLSv1_1'
+----------------------------------------------------------------
+
+After you install or upgrade Scrapy, Twisted or pyOpenSSL, you may get an
+exception with the following traceback::
+
+    […]
+      File "[…]/site-packages/twisted/protocols/tls.py", line 63, in <module>
+        from twisted.internet._sslverify import _setAcceptableProtocols
+      File "[…]/site-packages/twisted/internet/_sslverify.py", line 38, in <module>
+        TLSVersion.TLSv1_1: SSL.OP_NO_TLSv1_1,
+    AttributeError: 'module' object has no attribute 'OP_NO_TLSv1_1'
+
+The reason you get this exception is that your system or virtual environment
+has a version of pyOpenSSL that your version of Twisted does not support.
+
+To install a version of pyOpenSSL that your version of Twisted supports,
+reinstall Twisted with the :code:`tls` extra option::
+
+    pip install twisted[tls]
+
+For details, see `Issue #2473 <https://github.com/scrapy/scrapy/issues/2473>`_.
+
+.. _Python: https://www.python.org/
+.. _pip: https://pip.pypa.io/en/latest/installing/
+.. _lxml: https://lxml.de/index.html
+.. _parsel: https://pypi.org/project/parsel/
+.. _w3lib: https://pypi.org/project/w3lib/
+.. _twisted: https://twistedmatrix.com/trac/
+.. _cryptography: https://cryptography.io/en/latest/
+.. _pyOpenSSL: https://pypi.org/project/pyOpenSSL/
 .. _setuptools: https://pypi.python.org/pypi/setuptools
 .. _AUR Scrapy package: https://aur.archlinux.org/packages/scrapy/
+.. _homebrew: https://brew.sh/
+.. _zsh: https://www.zsh.org/
+.. _Scrapinghub: https://scrapinghub.com
+.. _Anaconda: https://docs.anaconda.com/anaconda/
+.. _Miniconda: https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html
+.. _conda-forge: https://conda-forge.org/
diff --git a/docs/intro/overview.rst b/docs/intro/overview.rst
index 289e975b8..dd80c7bd0 100644
--- a/docs/intro/overview.rst
+++ b/docs/intro/overview.rst
@@ -8,177 +8,90 @@ Scrapy is an application framework for crawling web sites and extracting
 structured data which can be used for a wide range of useful applications, like
 data mining, information processing or historical archival.
 
-Even though Scrapy was originally designed for `screen scraping`_ (more
-precisely, `web scraping`_), it can also be used to extract data using APIs
-(such as `Amazon Associates Web Services`_) or as a general purpose web
-crawler.
+Even though Scrapy was originally designed for `web scraping`_, it can also be
+used to extract data using APIs (such as `Amazon Associates Web Services`_) or
+as a general purpose web crawler.
 
-The purpose of this document is to introduce you to the concepts behind Scrapy
-so you can get an idea of how it works and decide if Scrapy is what you need.
 
-When you're ready to start a project, you can :ref:`start with the tutorial
-<intro-tutorial>`.
+Walk-through of an example spider
+=================================
 
-Pick a website
-==============
+In order to show you what Scrapy brings to the table, we'll walk you through an
+example of a Scrapy Spider using the simplest way to run a spider.
 
-So you need to extract some information from a website, but the website doesn't
-provide any API or mechanism to access that info programmatically.  Scrapy can
-help you extract that information.
-
-Let's say we want to extract the URL, name, description and size of all torrent
-files added today in the `Mininova`_ site.
-
-The list of all torrents added today can be found on this page:
-
-    http://www.mininova.org/today
-
-.. _intro-overview-item:
-
-Define the data you want to scrape
-==================================
-
-The first thing is to define the data we want to scrape. In Scrapy, this is
-done through :ref:`Scrapy Items <topics-items>` (Torrent files, in this case).
-
-This would be our Item::
+Here's the code for a spider that scrapes famous quotes from website
+http://quotes.toscrape.com, following the pagination::
 
     import scrapy
 
-    class TorrentItem(scrapy.Item):
-        url = scrapy.Field()
-        name = scrapy.Field()
-        description = scrapy.Field()
-        size = scrapy.Field()
 
-Write a Spider to extract the data
-==================================
+    class QuotesSpider(scrapy.Spider):
+        name = 'quotes'
+        start_urls = [
+            'http://quotes.toscrape.com/tag/humor/',
+        ]
 
-The next thing is to write a Spider which defines the start URL
-(http://www.mininova.org/today), the rules for following links and the rules
-for extracting the data from pages.
+        def parse(self, response):
+            for quote in response.css('div.quote'):
+                yield {
+                    'author': quote.xpath('span/small/text()').get(),
+                    'text': quote.css('span.text::text').get(),
+                }
 
-If we take a look at that page content we'll see that all torrent URLs are like
-``http://www.mininova.org/tor/NUMBER`` where ``NUMBER`` is an integer. We'll use
-that to construct the regular expression for the links to follow: ``/tor/\d+``.
+            next_page = response.css('li.next a::attr("href")').get()
+            if next_page is not None:
+                yield response.follow(next_page, self.parse)
 
-We'll use `XPath`_ for selecting the data to extract from the web page HTML
-source. Let's take one of those torrent pages:
+Put this in a text file, name it to something like ``quotes_spider.py``
+and run the spider using the :command:`runspider` command::
 
-    http://www.mininova.org/tor/2676093
+    scrapy runspider quotes_spider.py -o quotes.jl
 
-And look at the page HTML source to construct the XPath to select the data we
-want which is: torrent name, description and size.
+When this finishes you will have in the ``quotes.jl`` file a list of the
+quotes in JSON Lines format, containing text and author, looking like this::
 
-.. highlight:: html
-
-By looking at the page HTML source we can see that the file name is contained
-inside a ``<h1>`` tag::
-
-   <h1>Darwin - The Evolution Of An Exhibition</h1>
-
-.. highlight:: none
-
-An XPath expression to extract the name could be::
-
-    //h1/text()
-
-.. highlight:: html
-
-And the description is contained inside a ``<div>`` tag with ``id="description"``::
-
-   <h2>Description:</h2>
-
-   <div id="description">
-   Short documentary made for Plymouth City Museum and Art Gallery regarding the setup of an exhibit about Charles Darwin in conjunction with the 200th anniversary of his birth.
-
-   ...
-
-.. highlight:: none
-
-An XPath expression to select the description could be::
-
-    //div[@id='description']
-
-.. highlight:: html
-
-Finally, the file size is contained in the second ``<p>`` tag inside the ``<div>``
-tag with ``id=specifications``::
-
-   <div id="specifications">
-
-   <p>
-   <strong>Category:</strong>
-   <a href="/cat/4">Movies</a> &gt; <a href="/sub/35">Documentary</a>
-   </p>
-
-   <p>
-   <strong>Total size:</strong>
-   150.62&nbsp;megabyte</p>
+    {"author": "Jane Austen", "text": "\u201cThe person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.\u201d"}
+    {"author": "Steve Martin", "text": "\u201cA day without sunshine is like, you know, night.\u201d"}
+    {"author": "Garrison Keillor", "text": "\u201cAnyone who thinks sitting in church can make you a Christian must also think that sitting in a garage can make you a car.\u201d"}
+    ...
 
 
-.. highlight:: none
+What just happened?
+-------------------
 
-An XPath expression to select the file size could be::
+When you ran the command ``scrapy runspider quotes_spider.py``, Scrapy looked for a
+Spider definition inside it and ran it through its crawler engine.
 
-   //div[@id='specifications']/p[2]/text()[2]
+The crawl started by making requests to the URLs defined in the ``start_urls``
+attribute (in this case, only the URL for quotes in *humor* category)
+and called the default callback method ``parse``, passing the response object as
+an argument. In the ``parse`` callback, we loop through the quote elements
+using a CSS Selector, yield a Python dict with the extracted quote text and author,
+look for a link to the next page and schedule another request using the same
+``parse`` method as callback.
 
-.. highlight:: python
+Here you notice one of the main advantages about Scrapy: requests are
+:ref:`scheduled and processed asynchronously <topics-architecture>`.  This
+means that Scrapy doesn't need to wait for a request to be finished and
+processed, it can send another request or do other things in the meantime. This
+also means that other requests can keep going even if some request fails or an
+error happens while handling it.
 
-For more information about XPath see the `XPath reference`_.
+While this enables you to do very fast crawls (sending multiple concurrent
+requests at the same time, in a fault-tolerant way) Scrapy also gives you
+control over the politeness of the crawl through :ref:`a few settings
+<topics-settings-ref>`. You can do things like setting a download delay between
+each request, limiting amount of concurrent requests per domain or per IP, and
+even :ref:`using an auto-throttling extension <topics-autothrottle>` that tries
+to figure out these automatically.
 
-Finally, here's the spider code::
+.. note::
 
-    from scrapy.contrib.spiders import CrawlSpider, Rule
-    from scrapy.contrib.linkextractors import LinkExtractor
+    This is using :ref:`feed exports <topics-feed-exports>` to generate the
+    JSON file, you can easily change the export format (XML or CSV, for example) or the
+    storage backend (FTP or `Amazon S3`_, for example).  You can also write an
+    :ref:`item pipeline <topics-item-pipeline>` to store the items in a database.
 
-    class MininovaSpider(CrawlSpider):
-
-        name = 'mininova'
-        allowed_domains = ['mininova.org']
-        start_urls = ['http://www.mininova.org/today']
-        rules = [Rule(LinkExtractor(allow=['/tor/\d+']), 'parse_torrent')]
-
-        def parse_torrent(self, response):
-            torrent = TorrentItem()
-            torrent['url'] = response.url
-            torrent['name'] = response.xpath("//h1/text()").extract()
-            torrent['description'] = response.xpath("//div[@id='description']").extract()
-            torrent['size'] = response.xpath("//div[@id='info-left']/p[2]/text()[2]").extract()
-            return torrent
-
-The ``TorrentItem`` class is :ref:`defined above <intro-overview-item>`.
-
-Run the spider to extract the data
-==================================
-
-Finally, we'll run the spider to crawl the site and output the file
-``scraped_data.json`` with the scraped data in JSON format::
-
-    scrapy crawl mininova -o scraped_data.json
-
-This uses :ref:`feed exports <topics-feed-exports>` to generate the JSON file.
-You can easily change the export format (XML or CSV, for example) or the
-storage backend (FTP or `Amazon S3`_, for example).
-
-You can also write an :ref:`item pipeline <topics-item-pipeline>` to store the
-items in a database very easily.
-
-Review scraped data
-===================
-
-If you check the ``scraped_data.json`` file after the process finishes, you'll
-see the scraped items there::
-
-    [{"url": "http://www.mininova.org/tor/2676093", "name": ["Darwin - The Evolution Of An Exhibition"], "description": ["Short documentary made for Plymouth ..."], "size": ["150.62 megabyte"]},
-    # ... other items ...
-    ]
-
-You'll notice that all field values (except for the ``url`` which was assigned
-directly) are actually lists. This is because the :ref:`selectors
-<topics-selectors>` return lists. You may want to store single values, or
-perform some additional parsing/cleansing to the values. That's what
-:ref:`Item Loaders <topics-loaders>` are for.
 
 .. _topics-whatelse:
 
@@ -190,77 +103,53 @@ this is just the surface. Scrapy provides a lot of powerful features for making
 scraping easy and efficient, such as:
 
 * Built-in support for :ref:`selecting and extracting <topics-selectors>` data
-  from HTML and XML sources
+  from HTML/XML sources using extended CSS selectors and XPath expressions,
+  with helper methods to extract using regular expressions.
 
-* Built-in support for cleaning and sanitizing the scraped data using a
-  collection of reusable filters (called :ref:`Item Loaders <topics-loaders>`)
-  shared between all the spiders.
+* An :ref:`interactive shell console <topics-shell>` (IPython aware) for trying
+  out the CSS and XPath expressions to scrape data, very useful when writing or
+  debugging your spiders.
 
 * Built-in support for :ref:`generating feed exports <topics-feed-exports>` in
   multiple formats (JSON, CSV, XML) and storing them in multiple backends (FTP,
   S3, local filesystem)
 
-* A media pipeline for :ref:`automatically downloading images <topics-images>`
-  (or any other media) associated with the scraped items
-
-* Support for :ref:`extending Scrapy <extending-scrapy>` by plugging
-  your own functionality using :ref:`signals <topics-signals>` and a
-  well-defined API (middlewares, :ref:`extensions <topics-extensions>`, and
-  :ref:`pipelines <topics-item-pipeline>`).
-
-* Wide range of built-in middlewares and extensions for:
-
-  * cookies and session handling
-  * HTTP compression
-  * HTTP authentication
-  * HTTP cache
-  * user-agent spoofing
-  * robots.txt
-  * crawl depth restriction
-  * and more
-
 * Robust encoding support and auto-detection, for dealing with foreign,
   non-standard and broken encoding declarations.
 
-* Support for creating spiders based on pre-defined templates, to speed up
-  spider creation and make their code more consistent on large projects. See
-  :command:`genspider` command for more details.
+* :ref:`Strong extensibility support <extending-scrapy>`, allowing you to plug
+  in your own functionality using :ref:`signals <topics-signals>` and a
+  well-defined API (middlewares, :ref:`extensions <topics-extensions>`, and
+  :ref:`pipelines <topics-item-pipeline>`).
 
-* Extensible :ref:`stats collection <topics-stats>` for multiple spider
-  metrics, useful for monitoring the performance of your spiders and detecting
-  when they get broken
+* Wide range of built-in extensions and middlewares for handling:
 
-* An :ref:`Interactive shell console <topics-shell>` for trying XPaths, very
-  useful for writing and debugging your spiders
-
-* A :ref:`System service <topics-scrapyd>` designed to ease the deployment and
-  run of your spiders in production.
+  - cookies and session handling
+  - HTTP features like compression, authentication, caching
+  - user-agent spoofing
+  - robots.txt
+  - crawl depth restriction
+  - and more
 
 * A :ref:`Telnet console <topics-telnetconsole>` for hooking into a Python
   console running inside your Scrapy process, to introspect and debug your
   crawler
 
-* :ref:`Logging <topics-logging>` facility that you can hook on to for catching
-  errors during the scraping process.
-
-* Support for crawling based on URLs discovered through `Sitemaps`_
-
-* A caching DNS resolver
+* Plus other goodies like reusable spiders to crawl sites from `Sitemaps`_ and
+  XML/CSV feeds, a media pipeline for :ref:`automatically downloading images
+  <topics-media-pipeline>` (or any other media) associated with the scraped
+  items, a caching DNS resolver, and much more!
 
 What's next?
 ============
 
-The next obvious steps are for you to `download Scrapy`_, read :ref:`the
-tutorial <intro-tutorial>` and join `the community`_. Thanks for your
+The next steps for you are to :ref:`install Scrapy <intro-install>`,
+:ref:`follow through the tutorial <intro-tutorial>` to learn how to create
+a full-blown Scrapy project and `join the community`_. Thanks for your
 interest!
 
-.. _download Scrapy: http://scrapy.org/download/
-.. _the community: http://scrapy.org/community/
-.. _screen scraping: http://en.wikipedia.org/wiki/Screen_scraping
-.. _web scraping: http://en.wikipedia.org/wiki/Web_scraping
-.. _Amazon Associates Web Services: http://aws.amazon.com/associates/
-.. _Mininova: http://www.mininova.org
-.. _XPath: http://www.w3.org/TR/xpath
-.. _XPath reference: http://www.w3.org/TR/xpath
-.. _Amazon S3: http://aws.amazon.com/s3/
-.. _Sitemaps: http://www.sitemaps.org
+.. _join the community: https://scrapy.org/community/
+.. _web scraping: https://en.wikipedia.org/wiki/Web_scraping
+.. _Amazon Associates Web Services: https://affiliate-program.amazon.com/gp/advertising/api/detail/main.html
+.. _Amazon S3: https://aws.amazon.com/s3/
+.. _Sitemaps: https://www.sitemaps.org/index.html
diff --git a/docs/intro/tutorial.rst b/docs/intro/tutorial.rst
index a4248d7aa..f96c78887 100644
--- a/docs/intro/tutorial.rst
+++ b/docs/intro/tutorial.rst
@@ -7,447 +7,752 @@ Scrapy Tutorial
 In this tutorial, we'll assume that Scrapy is already installed on your system.
 If that's not the case, see :ref:`intro-install`.
 
-We are going to use `Open directory project (dmoz) <http://www.dmoz.org/>`_ as
-our example domain to scrape.
+We are going to scrape `quotes.toscrape.com <http://quotes.toscrape.com/>`_, a website
+that lists quotes from famous authors.
 
 This tutorial will walk you through these tasks:
 
 1. Creating a new Scrapy project
-2. Defining the Items you will extract
-3. Writing a :ref:`spider <topics-spiders>` to crawl a site and extract
-   :ref:`Items <topics-items>`
-4. Writing an :ref:`Item Pipeline <topics-item-pipeline>` to store the
-   extracted Items
+2. Writing a :ref:`spider <topics-spiders>` to crawl a site and extract data
+3. Exporting the scraped data using the command line
+4. Changing spider to recursively follow links
+5. Using spider arguments
 
 Scrapy is written in Python_. If you're new to the language you might want to
 start by getting an idea of what the language is like, to get the most out of
-Scrapy.  If you're already familiar with other languages, and want to learn
-Python quickly, we recommend `Learn Python The Hard Way`_.  If you're new to programming
-and want to start with Python, take a look at `this list of Python resources
-for non-programmers`_.
+Scrapy.
+
+If you're already familiar with other languages, and want to learn Python quickly, the `Python Tutorial`_ is a good resource.
+
+If you're new to programming and want to start with Python, the following books
+may be useful to you:
+
+* `Automate the Boring Stuff With Python`_
+
+* `How To Think Like a Computer Scientist`_
+
+* `Learn Python 3 The Hard Way`_
+
+You can also take a look at `this list of Python resources for non-programmers`_,
+as well as the `suggested resources in the learnpython-subreddit`_.
+
+.. _Python: https://www.python.org/
+.. _this list of Python resources for non-programmers: https://wiki.python.org/moin/BeginnersGuide/NonProgrammers
+.. _Python Tutorial: https://docs.python.org/3/tutorial
+.. _Automate the Boring Stuff With Python: https://automatetheboringstuff.com/
+.. _How To Think Like a Computer Scientist: http://openbookproject.net/thinkcs/python/english3e/
+.. _Learn Python 3 The Hard Way: https://learnpythonthehardway.org/python3/
+.. _suggested resources in the learnpython-subreddit: https://www.reddit.com/r/learnpython/wiki/index#wiki_new_to_python.3F
 
-.. _Python: http://www.python.org
-.. _this list of Python resources for non-programmers: http://wiki.python.org/moin/BeginnersGuide/NonProgrammers
-.. _Learn Python The Hard Way: http://learnpythonthehardway.org/book/
 
 Creating a project
 ==================
 
-Before you start scraping, you will have set up a new Scrapy project. Enter a
-directory where you'd like to store your code and then run::
+Before you start scraping, you will have to set up a new Scrapy project. Enter a
+directory where you'd like to store your code and run::
 
     scrapy startproject tutorial
 
 This will create a ``tutorial`` directory with the following contents::
 
     tutorial/
-        scrapy.cfg
-        tutorial/
+        scrapy.cfg            # deploy configuration file
+
+        tutorial/             # project's Python module, you'll import your code from here
             __init__.py
-            items.py
-            pipelines.py
-            settings.py
-            spiders/
+
+            items.py          # project items definition file
+
+            middlewares.py    # project middlewares file
+
+            pipelines.py      # project pipelines file
+
+            settings.py       # project settings file
+
+            spiders/          # a directory where you'll later put your spiders
                 __init__.py
-                ...
 
-These are basically:
-
-* ``scrapy.cfg``: the project configuration file
-* ``tutorial/``: the project's python module, you'll later import your code from
-  here.
-* ``tutorial/items.py``: the project's items file.
-* ``tutorial/pipelines.py``: the project's pipelines file.
-* ``tutorial/settings.py``: the project's settings file.
-* ``tutorial/spiders/``: a directory where you'll later put your spiders.
-
-Defining our Item
-=================
-
-`Items` are containers that will be loaded with the scraped data; they work
-like simple python dicts but provide additional protection against populating
-undeclared fields, to prevent typos.
-
-They are declared by creating a :class:`scrapy.Item <scrapy.item.Item>` class and defining
-its attributes as :class:`scrapy.Field <scrapy.item.Field>` objects, like you will in an ORM
-(don't worry if you're not familiar with ORMs, you will see that this is an
-easy task).
-
-We begin by modeling the item that we will use to hold the sites data obtained
-from dmoz.org, as we want to capture the name, url and description of the
-sites, we define fields for each of these three attributes. To do that, we edit
-``items.py``, found in the ``tutorial`` directory. Our Item class looks like this::
-
-    import scrapy
-
-    class DmozItem(scrapy.Item):
-        title = scrapy.Field()
-        link = scrapy.Field()
-        desc = scrapy.Field()
-
-This may seem complicated at first, but defining the item allows you to use other handy
-components of Scrapy that need to know how your item looks.
 
 Our first Spider
 ================
 
-Spiders are user-written classes used to scrape information from a domain (or group
-of domains).
+Spiders are classes that you define and that Scrapy uses to scrape information
+from a website (or a group of websites). They must subclass
+:class:`~scrapy.spiders.Spider` and define the initial requests to make,
+optionally how to follow links in the pages, and how to parse the downloaded
+page content to extract data.
 
-They define an initial list of URLs to download, how to follow links, and how
-to parse the contents of those pages to extract :ref:`items <topics-items>`.
-
-To create a Spider, you must subclass :class:`scrapy.Spider <scrapy.spider.Spider>` and
-define the three main mandatory attributes:
-
-* :attr:`~scrapy.spider.Spider.name`: identifies the Spider. It must be
-  unique, that is, you can't set the same name for different Spiders.
-
-* :attr:`~scrapy.spider.Spider.start_urls`: is a list of URLs where the
-  Spider will begin to crawl from.  So, the first pages downloaded will be those
-  listed here. The subsequent URLs will be generated successively from data
-  contained in the start URLs.
-
-* :meth:`~scrapy.spider.Spider.parse` is a method of the spider, which will
-  be called with the downloaded :class:`~scrapy.http.Response` object of each
-  start URL. The response is passed to the method as the first and only
-  argument.
-
-  This method is responsible for parsing the response data and extracting
-  scraped data (as scraped items) and more URLs to follow.
-
-  The :meth:`~scrapy.spider.Spider.parse` method is in charge of processing
-  the response and returning scraped data (as :class:`~scrapy.item.Item`
-  objects) and more URLs to follow (as :class:`~scrapy.http.Request` objects).
-
-This is the code for our first Spider; save it in a file named
-``dmoz_spider.py`` under the ``tutorial/spiders`` directory::
+This is the code for our first Spider. Save it in a file named
+``quotes_spider.py`` under the ``tutorial/spiders`` directory in your project::
 
     import scrapy
 
-    class DmozSpider(scrapy.Spider):
-        name = "dmoz"
-        allowed_domains = ["dmoz.org"]
-        start_urls = [
-            "http://www.dmoz.org/Computers/Programming/Languages/Python/Books/",
-            "http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/"
-        ]
+
+    class QuotesSpider(scrapy.Spider):
+        name = "quotes"
+
+        def start_requests(self):
+            urls = [
+                'http://quotes.toscrape.com/page/1/',
+                'http://quotes.toscrape.com/page/2/',
+            ]
+            for url in urls:
+                yield scrapy.Request(url=url, callback=self.parse)
 
         def parse(self, response):
-            filename = response.url.split("/")[-2]
+            page = response.url.split("/")[-2]
+            filename = 'quotes-%s.html' % page
             with open(filename, 'wb') as f:
                 f.write(response.body)
+            self.log('Saved file %s' % filename)
 
-Crawling
---------
+
+As you can see, our Spider subclasses :class:`scrapy.Spider <scrapy.spiders.Spider>`
+and defines some attributes and methods:
+
+* :attr:`~scrapy.spiders.Spider.name`: identifies the Spider. It must be
+  unique within a project, that is, you can't set the same name for different
+  Spiders.
+
+* :meth:`~scrapy.spiders.Spider.start_requests`: must return an iterable of
+  Requests (you can return a list of requests or write a generator function)
+  which the Spider will begin to crawl from. Subsequent requests will be
+  generated successively from these initial requests.
+
+* :meth:`~scrapy.spiders.Spider.parse`: a method that will be called to handle
+  the response downloaded for each of the requests made. The response parameter
+  is an instance of :class:`~scrapy.http.TextResponse` that holds
+  the page content and has further helpful methods to handle it.
+
+  The :meth:`~scrapy.spiders.Spider.parse` method usually parses the response, extracting
+  the scraped data as dicts and also finding new URLs to
+  follow and creating new requests (:class:`~scrapy.http.Request`) from them.
+
+How to run our spider
+---------------------
 
 To put our spider to work, go to the project's top level directory and run::
 
-   scrapy crawl dmoz
+   scrapy crawl quotes
 
-The ``crawl dmoz`` command runs the spider for the ``dmoz.org`` domain. You
-will get an output similar to this::
+This command runs the spider with name ``quotes`` that we've just added, that
+will send some requests for the ``quotes.toscrape.com`` domain. You will get an output
+similar to this::
 
-    2014-01-23 18:13:07-0400 [scrapy] INFO: Scrapy started (bot: tutorial)
-    2014-01-23 18:13:07-0400 [scrapy] INFO: Optional features available: ...
-    2014-01-23 18:13:07-0400 [scrapy] INFO: Overridden settings: {}
-    2014-01-23 18:13:07-0400 [scrapy] INFO: Enabled extensions: ...
-    2014-01-23 18:13:07-0400 [scrapy] INFO: Enabled downloader middlewares: ...
-    2014-01-23 18:13:07-0400 [scrapy] INFO: Enabled spider middlewares: ...
-    2014-01-23 18:13:07-0400 [scrapy] INFO: Enabled item pipelines: ...
-    2014-01-23 18:13:07-0400 [dmoz] INFO: Spider opened
-    2014-01-23 18:13:08-0400 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/> (referer: None)
-    2014-01-23 18:13:09-0400 [dmoz] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> (referer: None)
-    2014-01-23 18:13:09-0400 [dmoz] INFO: Closing spider (finished)
+    ... (omitted for brevity)
+    2016-12-16 21:24:05 [scrapy.core.engine] INFO: Spider opened
+    2016-12-16 21:24:05 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
+    2016-12-16 21:24:05 [scrapy.extensions.telnet] DEBUG: Telnet console listening on 127.0.0.1:6023
+    2016-12-16 21:24:05 [scrapy.core.engine] DEBUG: Crawled (404) <GET http://quotes.toscrape.com/robots.txt> (referer: None)
+    2016-12-16 21:24:05 [scrapy.core.engine] DEBUG: Crawled (200) <GET http://quotes.toscrape.com/page/1/> (referer: None)
+    2016-12-16 21:24:05 [scrapy.core.engine] DEBUG: Crawled (200) <GET http://quotes.toscrape.com/page/2/> (referer: None)
+    2016-12-16 21:24:05 [quotes] DEBUG: Saved file quotes-1.html
+    2016-12-16 21:24:05 [quotes] DEBUG: Saved file quotes-2.html
+    2016-12-16 21:24:05 [scrapy.core.engine] INFO: Closing spider (finished)
+    ...
 
-Pay attention to the lines containing ``[dmoz]``, which corresponds to our
-spider. You can see a log line for each URL defined in ``start_urls``. Because
-these URLs are the starting ones, they have no referrers, which is shown at the
-end of the log line, where it says ``(referer: None)``.
+Now, check the files in the current directory. You should notice that two new
+files have been created: *quotes-1.html* and *quotes-2.html*, with the content
+for the respective URLs, as our ``parse`` method instructs.
+
+.. note:: If you are wondering why we haven't parsed the HTML yet, hold
+  on, we will cover that soon.
 
-But more interesting, as our ``parse`` method instructs, two files have been
-created: *Books* and *Resources*, with the content of both URLs.
 
 What just happened under the hood?
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Scrapy creates :class:`scrapy.Request <scrapy.http.Request>` objects
-for each URL in the ``start_urls`` attribute of the Spider, and assigns
-them the ``parse`` method of the spider as their callback function.
-
-These Requests are scheduled, then executed, and :class:`scrapy.http.Response`
-objects are returned and then fed back to the spider, through the
-:meth:`~scrapy.spider.Spider.parse` method.
-
-Extracting Items
-----------------
-
-Introduction to Selectors
-^^^^^^^^^^^^^^^^^^^^^^^^^
-
-There are several ways to extract data from web pages. Scrapy uses a mechanism
-based on `XPath`_ or `CSS`_ expressions called :ref:`Scrapy Selectors
-<topics-selectors>`.  For more information about selectors and other extraction
-mechanisms see the :ref:`Selectors documentation <topics-selectors>`.
-
-.. _XPath: http://www.w3.org/TR/xpath
-.. _CSS: http://www.w3.org/TR/selectors
-
-Here are some examples of XPath expressions and their meanings:
-
-* ``/html/head/title``: selects the ``<title>`` element, inside the ``<head>``
-  element of a HTML document
-
-* ``/html/head/title/text()``: selects the text inside the aforementioned
-  ``<title>`` element.
-
-* ``//td``: selects all the ``<td>`` elements
-
-* ``//div[@class="mine"]``: selects all ``div`` elements which contain an
-  attribute ``class="mine"``
-
-These are just a couple of simple examples of what you can do with XPath, but
-XPath expressions are indeed much more powerful. To learn more about XPath we
-recommend `this XPath tutorial <http://www.w3schools.com/XPath/default.asp>`_.
-
-For working with XPaths, Scrapy provides :class:`~scrapy.selector.Selector`
-class and convenient shortcuts to avoid instantiating selectors yourself
-everytime you need to select something from a response.
-
-You can see selectors as objects that represent nodes in the document
-structure. So, the first instantiated selectors are associated with the root
-node, or the entire document.
-
-Selectors have four basic methods (click on the method to see the complete API
-documentation):
-
-* :meth:`~scrapy.selector.Selector.xpath`: returns a list of selectors, each of
-  them representing the nodes selected by the xpath expression given as
-  argument.
-
-* :meth:`~scrapy.selector.Selector.css`: returns a list of selectors, each of
-  them representing the nodes selected by the CSS expression given as argument.
-
-* :meth:`~scrapy.selector.Selector.extract`: returns a unicode string with the
-  selected data.
-
-* :meth:`~scrapy.selector.Selector.re`: returns a list of unicode strings
-  extracted by applying the regular expression given as argument.
+Scrapy schedules the :class:`scrapy.Request <scrapy.http.Request>` objects
+returned by the ``start_requests`` method of the Spider. Upon receiving a
+response for each one, it instantiates :class:`~scrapy.http.Response` objects
+and calls the callback method associated with the request (in this case, the
+``parse`` method) passing the response as argument.
 
 
-Trying Selectors in the Shell
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+A shortcut to the start_requests method
+---------------------------------------
+Instead of implementing a :meth:`~scrapy.spiders.Spider.start_requests` method
+that generates :class:`scrapy.Request <scrapy.http.Request>` objects from URLs,
+you can just define a :attr:`~scrapy.spiders.Spider.start_urls` class attribute
+with a list of URLs. This list will then be used by the default implementation
+of :meth:`~scrapy.spiders.Spider.start_requests` to create the initial requests
+for your spider::
 
-To illustrate the use of Selectors we're going to use the built-in :ref:`Scrapy
-shell <topics-shell>`, which also requires IPython (an extended Python console)
-installed on your system.
+    import scrapy
 
-To start a shell, you must go to the project's top level directory and run::
 
-    scrapy shell "http://www.dmoz.org/Computers/Programming/Languages/Python/Books/"
+    class QuotesSpider(scrapy.Spider):
+        name = "quotes"
+        start_urls = [
+            'http://quotes.toscrape.com/page/1/',
+            'http://quotes.toscrape.com/page/2/',
+        ]
+
+        def parse(self, response):
+            page = response.url.split("/")[-2]
+            filename = 'quotes-%s.html' % page
+            with open(filename, 'wb') as f:
+                f.write(response.body)
+
+The :meth:`~scrapy.spiders.Spider.parse` method will be called to handle each
+of the requests for those URLs, even though we haven't explicitly told Scrapy
+to do so. This happens because :meth:`~scrapy.spiders.Spider.parse` is Scrapy's
+default callback method, which is called for requests without an explicitly
+assigned callback.
+
+
+Extracting data
+---------------
+
+The best way to learn how to extract data with Scrapy is trying selectors
+using the :ref:`Scrapy shell <topics-shell>`. Run::
+
+    scrapy shell 'http://quotes.toscrape.com/page/1/'
 
 .. note::
 
-   Remember to always enclose urls with quotes when running Scrapy shell from
-   command-line, otherwise urls containing arguments (ie. ``&`` character)
+   Remember to always enclose urls in quotes when running Scrapy shell from
+   command-line, otherwise urls containing arguments (i.e. ``&`` character)
    will not work.
 
-This is what the shell looks like::
+   On Windows, use double quotes instead::
+
+       scrapy shell "http://quotes.toscrape.com/page/1/"
+
+You will see something like::
 
     [ ... Scrapy log here ... ]
-
-    2014-01-23 17:11:42-0400 [default] DEBUG: Crawled (200) <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Books/> (referer: None)
+    2016-09-19 12:09:27 [scrapy.core.engine] DEBUG: Crawled (200) <GET http://quotes.toscrape.com/page/1/> (referer: None)
     [s] Available Scrapy objects:
-    [s]   crawler    <scrapy.crawler.Crawler object at 0x3636b50>
+    [s]   scrapy     scrapy module (contains scrapy.Request, scrapy.Selector, etc)
+    [s]   crawler    <scrapy.crawler.Crawler object at 0x7fa91d888c90>
     [s]   item       {}
-    [s]   request    <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Books/>
-    [s]   response   <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/>
-    [s]   settings   <scrapy.settings.Settings object at 0x3fadc50>
-    [s]   spider     <Spider 'default' at 0x3cebf50>
+    [s]   request    <GET http://quotes.toscrape.com/page/1/>
+    [s]   response   <200 http://quotes.toscrape.com/page/1/>
+    [s]   settings   <scrapy.settings.Settings object at 0x7fa91d888c10>
+    [s]   spider     <DefaultSpider 'default' at 0x7fa91c8af990>
     [s] Useful shortcuts:
     [s]   shelp()           Shell help (print this help)
     [s]   fetch(req_or_url) Fetch request (or URL) and update local objects
     [s]   view(response)    View response in a browser
 
-    In [1]:
+Using the shell, you can try selecting elements using `CSS`_ with the response
+object:
 
-After the shell loads, you will have the response fetched in a local
-``response`` variable, so if you type ``response.body`` you will see the body
-of the response, or you can type ``response.headers`` to see its headers.
+.. invisible-code-block: python
 
-More important, if you type ``response.selector`` you will access a selector
-object you can use to query the response, and convenient shortcuts like
-``response.xpath()`` and ``response.css()`` mapping to
-``response.selector.xpath()`` and ``response.selector.css()``
+    response = load_response('http://quotes.toscrape.com/page/1/', 'quotes1.html')
+
+>>> response.css('title')
+[<Selector xpath='descendant-or-self::title' data='<title>Quotes to Scrape</title>'>]
+
+The result of running ``response.css('title')`` is a list-like object called
+:class:`~scrapy.selector.SelectorList`, which represents a list of
+:class:`~scrapy.selector.Selector` objects that wrap around XML/HTML elements
+and allow you to run further queries to fine-grain the selection or extract the
+data.
+
+To extract the text from the title above, you can do:
+
+>>> response.css('title::text').getall()
+['Quotes to Scrape']
+
+There are two things to note here: one is that we've added ``::text`` to the
+CSS query, to mean we want to select only the text elements directly inside
+``<title>`` element.  If we don't specify ``::text``, we'd get the full title
+element, including its tags:
+
+>>> response.css('title').getall()
+['<title>Quotes to Scrape</title>']
+
+The other thing is that the result of calling ``.getall()`` is a list: it is
+possible that a selector returns more than one result, so we extract them all.
+When you know you just want the first result, as in this case, you can do:
+
+>>> response.css('title::text').get()
+'Quotes to Scrape'
+
+As an alternative, you could've written:
+
+>>> response.css('title::text')[0].get()
+'Quotes to Scrape'
+
+However, using ``.get()`` directly on a :class:`~scrapy.selector.SelectorList`
+instance avoids an ``IndexError`` and returns ``None`` when it doesn't
+find any element matching the selection.
+
+There's a lesson here: for most scraping code, you want it to be resilient to
+errors due to things not being found on a page, so that even if some parts fail
+to be scraped, you can at least get **some** data.
+
+Besides the :meth:`~scrapy.selector.SelectorList.getall` and
+:meth:`~scrapy.selector.SelectorList.get` methods, you can also use
+the :meth:`~scrapy.selector.SelectorList.re` method to extract using
+:doc:`regular expressions <library/re>`:
+
+>>> response.css('title::text').re(r'Quotes.*')
+['Quotes to Scrape']
+>>> response.css('title::text').re(r'Q\w+')
+['Quotes']
+>>> response.css('title::text').re(r'(\w+) to (\w+)')
+['Quotes', 'Scrape']
+
+In order to find the proper CSS selectors to use, you might find useful opening
+the response page from the shell in your web browser using ``view(response)``.
+You can use your browser's developer tools to inspect the HTML and come up
+with a selector (see :ref:`topics-developer-tools`).
+
+`Selector Gadget`_ is also a nice tool to quickly find CSS selector for
+visually selected elements, which works in many browsers.
+
+.. _Selector Gadget: https://selectorgadget.com/
 
 
-So let's try it::
+XPath: a brief intro
+^^^^^^^^^^^^^^^^^^^^
 
-    In [1]: response.xpath('//title')
-    Out[1]: [<Selector xpath='//title' data=u'<title>Open Directory - Computers: Progr'>]
- 
-    In [2]: response.xpath('//title').extract()
-    Out[2]: [u'<title>Open Directory - Computers: Programming: Languages: Python: Books</title>']
- 
-    In [3]: response.xpath('//title/text()')
-    Out[3]: [<Selector xpath='//title/text()' data=u'Open Directory - Computers: Programming:'>]
- 
-    In [4]: response.xpath('//title/text()').extract()
-    Out[4]: [u'Open Directory - Computers: Programming: Languages: Python: Books']
- 
-    In [5]: response.xpath('//title/text()').re('(\w+):')
-    Out[5]: [u'Computers', u'Programming', u'Languages', u'Python']
+Besides `CSS`_, Scrapy selectors also support using `XPath`_ expressions:
 
-Extracting the data
-^^^^^^^^^^^^^^^^^^^
+>>> response.xpath('//title')
+[<Selector xpath='//title' data='<title>Quotes to Scrape</title>'>]
+>>> response.xpath('//title/text()').get()
+'Quotes to Scrape'
 
-Now, let's try to extract some real information from those pages.
+XPath expressions are very powerful, and are the foundation of Scrapy
+Selectors. In fact, CSS selectors are converted to XPath under-the-hood. You
+can see that if you read closely the text representation of the selector
+objects in the shell.
 
-You could type ``response.body`` in the console, and inspect the source code to
-figure out the XPaths you need to use. However, inspecting the raw HTML code
-there could become a very tedious task. To make this an easier task, you can
-use some Firefox extensions like Firebug. For more information see
-:ref:`topics-firebug` and :ref:`topics-firefox`.
+While perhaps not as popular as CSS selectors, XPath expressions offer more
+power because besides navigating the structure, it can also look at the
+content. Using XPath, you're able to select things like: *select the link
+that contains the text "Next Page"*. This makes XPath very fitting to the task
+of scraping, and we encourage you to learn XPath even if you already know how to
+construct CSS selectors, it will make scraping much easier.
 
-After inspecting the page source, you'll find that the web sites information
-is inside a ``<ul>`` element, in fact the *second* ``<ul>`` element.
+We won't cover much of XPath here, but you can read more about :ref:`using XPath
+with Scrapy Selectors here <topics-selectors>`. To learn more about XPath, we
+recommend `this tutorial to learn XPath through examples
+<http://zvon.org/comp/r/tut-XPath_1.html>`_, and `this tutorial to learn "how
+to think in XPath" <http://plasmasturm.org/log/xpath101/>`_.
 
-So we can select each ``<li>`` element belonging to the sites list with this
-code::
+.. _XPath: https://www.w3.org/TR/xpath/all/
+.. _CSS: https://www.w3.org/TR/selectors
 
-    sel.xpath('//ul/li')
+Extracting quotes and authors
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-And from them, the sites descriptions::
+Now that you know a bit about selection and extraction, let's complete our
+spider by writing the code to extract the quotes from the web page.
 
-    sel.xpath('//ul/li/text()').extract()
+Each quote in http://quotes.toscrape.com is represented by HTML elements that look
+like this:
 
-The sites titles::
+.. code-block:: html
 
-    sel.xpath('//ul/li/a/text()').extract()
+    <div class="quote">
+        <span class="text">“The world as we have created it is a process of our
+        thinking. It cannot be changed without changing our thinking.”</span>
+        <span>
+            by <small class="author">Albert Einstein</small>
+            <a href="/author/Albert-Einstein">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <a class="tag" href="/tag/change/page/1/">change</a>
+            <a class="tag" href="/tag/deep-thoughts/page/1/">deep-thoughts</a>
+            <a class="tag" href="/tag/thinking/page/1/">thinking</a>
+            <a class="tag" href="/tag/world/page/1/">world</a>
+        </div>
+    </div>
 
-And the sites links::
+Let's open up scrapy shell and play a bit to find out how to extract the data
+we want::
 
-    sel.xpath('//ul/li/a/@href').extract()
+    $ scrapy shell 'http://quotes.toscrape.com'
 
-As we've said before, each ``.xpath()`` call returns a list of selectors, so we can
-concatenate further ``.xpath()`` calls to dig deeper into a node. We are going to use
-that property here, so::
+We get a list of selectors for the quote HTML elements with:
 
-    for sel in response.xpath('//ul/li'):
-        title = sel.xpath('a/text()').extract()
-        link = sel.xpath('a/@href').extract()
-        desc = sel.xpath('text()').extract()
-        print title, link, desc
+>>> response.css("div.quote")
+[<Selector xpath="descendant-or-self::div[@class and contains(concat(' ', normalize-space(@class), ' '), ' quote ')]" data='<div class="quote" itemscope itemtype...'>,
+ <Selector xpath="descendant-or-self::div[@class and contains(concat(' ', normalize-space(@class), ' '), ' quote ')]" data='<div class="quote" itemscope itemtype...'>,
+ ...]
 
-.. note::
+Each of the selectors returned by the query above allows us to run further
+queries over their sub-elements. Let's assign the first selector to a
+variable, so that we can run our CSS selectors directly on a particular quote:
 
-    For a more detailed description of using nested selectors, see
-    :ref:`topics-selectors-nesting-selectors` and
-    :ref:`topics-selectors-relative-xpaths` in the :ref:`topics-selectors`
-    documentation
+>>> quote = response.css("div.quote")[0]
 
-Let's add this code to our spider::
+Now, let's extract ``text``, ``author`` and the ``tags`` from that quote
+using the ``quote`` object we just created:
 
-    import scrapy
-     
-    class DmozSpider(scrapy.Spider):
-        name = "dmoz"
-        allowed_domains = ["dmoz.org"]
-        start_urls = [
-            "http://www.dmoz.org/Computers/Programming/Languages/Python/Books/",
-            "http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/"
-        ]
-     
-        def parse(self, response):
-            for sel in response.xpath('//ul/li'):
-                title = sel.xpath('a/text()').extract()
-                link = sel.xpath('a/@href').extract()
-                desc = sel.xpath('text()').extract()
-                print title, link, desc
+>>> text = quote.css("span.text::text").get()
+>>> text
+'“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”'
+>>> author = quote.css("small.author::text").get()
+>>> author
+'Albert Einstein'
 
-Now try crawling the dmoz.org domain again and you'll see sites being printed
-in your output, run::
+Given that the tags are a list of strings, we can use the ``.getall()`` method
+to get all of them:
 
-    scrapy crawl dmoz
+>>> tags = quote.css("div.tags a.tag::text").getall()
+>>> tags
+['change', 'deep-thoughts', 'thinking', 'world']
 
-Using our item
---------------
+.. invisible-code-block: python
 
-:class:`~scrapy.item.Item` objects are custom python dicts; you can access the
-values of their fields (attributes of the class we defined earlier) using the
-standard dict syntax like::
+  from sys import version_info
 
-    >>> item = DmozItem()
-    >>> item['title'] = 'Example title'
-    >>> item['title']
-    'Example title'
+.. skip: next if(version_info < (3, 6), reason="Only Python 3.6+ dictionaries match the output")
 
-Spiders are expected to return their scraped data inside
-:class:`~scrapy.item.Item` objects. So, in order to return the data we've
-scraped so far, the final code for our Spider would be like this::
+Having figured out how to extract each bit, we can now iterate over all the
+quotes elements and put them together into a Python dictionary:
+
+>>> for quote in response.css("div.quote"):
+...     text = quote.css("span.text::text").get()
+...     author = quote.css("small.author::text").get()
+...     tags = quote.css("div.tags a.tag::text").getall()
+...     print(dict(text=text, author=author, tags=tags))
+{'text': '“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”', 'author': 'Albert Einstein', 'tags': ['change', 'deep-thoughts', 'thinking', 'world']}
+{'text': '“It is our choices, Harry, that show what we truly are, far more than our abilities.”', 'author': 'J.K. Rowling', 'tags': ['abilities', 'choices']}
+...
+
+Extracting data in our spider
+-----------------------------
+
+Let's get back to our spider. Until now, it doesn't extract any data in
+particular, just saves the whole HTML page to a local file. Let's integrate the
+extraction logic above into our spider.
+
+A Scrapy spider typically generates many dictionaries containing the data
+extracted from the page. To do that, we use the ``yield`` Python keyword
+in the callback, as you can see below::
 
     import scrapy
 
-    from tutorial.items import DmozItem
 
-    class DmozSpider(scrapy.Spider):
-        name = "dmoz"
-        allowed_domains = ["dmoz.org"]
+    class QuotesSpider(scrapy.Spider):
+        name = "quotes"
         start_urls = [
-            "http://www.dmoz.org/Computers/Programming/Languages/Python/Books/",
-            "http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/"
+            'http://quotes.toscrape.com/page/1/',
+            'http://quotes.toscrape.com/page/2/',
         ]
 
         def parse(self, response):
-            for sel in response.xpath('//ul/li'):
-                item = DmozItem()
-                item['title'] = sel.xpath('a/text()').extract()
-                item['link'] = sel.xpath('a/@href').extract()
-                item['desc'] = sel.xpath('text()').extract()
-                yield item
+            for quote in response.css('div.quote'):
+                yield {
+                    'text': quote.css('span.text::text').get(),
+                    'author': quote.css('small.author::text').get(),
+                    'tags': quote.css('div.tags a.tag::text').getall(),
+                }
 
-.. note:: You can find a fully-functional variant of this spider in the dirbot_
-   project available at https://github.com/scrapy/dirbot
+If you run this spider, it will output the extracted data with the log::
 
-Now doing a crawl on the dmoz.org domain yields ``DmozItem`` objects::
+    2016-09-19 18:57:19 [scrapy.core.scraper] DEBUG: Scraped from <200 http://quotes.toscrape.com/page/1/>
+    {'tags': ['life', 'love'], 'author': 'André Gide', 'text': '“It is better to be hated for what you are than to be loved for what you are not.”'}
+    2016-09-19 18:57:19 [scrapy.core.scraper] DEBUG: Scraped from <200 http://quotes.toscrape.com/page/1/>
+    {'tags': ['edison', 'failure', 'inspirational', 'paraphrased'], 'author': 'Thomas A. Edison', 'text': "“I have not failed. I've just found 10,000 ways that won't work.”"}
 
-   [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/>
-        {'desc': [u' - By David Mertz; Addison Wesley. Book in progress, full text, ASCII format. Asks for feedback. [author website, Gnosis Software, Inc.\n],
-         'link': [u'http://gnosis.cx/TPiP/'],
-         'title': [u'Text Processing in Python']}
-   [dmoz] DEBUG: Scraped from <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/>
-        {'desc': [u' - By Sean McGrath; Prentice Hall PTR, 2000, ISBN 0130211192, has CD-ROM. Methods to build XML applications fast, Python tutorial, DOM and SAX, new Pyxie open source XML processing library. [Prentice Hall PTR]\n'],
-         'link': [u'http://www.informit.com/store/product.aspx?isbn=0130211192'],
-         'title': [u'XML Processing with Python']}
+
+.. _storing-data:
 
 Storing the scraped data
 ========================
 
-The simplest way to store the scraped data is by using the :ref:`Feed exports
+The simplest way to store the scraped data is by using :ref:`Feed exports
 <topics-feed-exports>`, with the following command::
 
-    scrapy crawl dmoz -o items.json
+    scrapy crawl quotes -O quotes.json
 
-That will generate a ``items.json`` file containing all scraped items,
+That will generate an ``quotes.json`` file containing all scraped items,
 serialized in `JSON`_.
 
+The ``-O`` command-line switch overwrites any existing file; use ``-o`` instead
+to append new content to any existing file. However, appending to a JSON file
+makes the file contents invalid JSON. When appending to a file, consider
+using a different serialization format, such as `JSON Lines`_::
+
+    scrapy crawl quotes -o quotes.jl
+
+The `JSON Lines`_ format is useful because it's stream-like, you can easily
+append new records to it. It doesn't have the same problem of JSON when you run
+twice. Also, as each record is a separate line, you can process big files
+without having to fit everything in memory, there are tools like `JQ`_ to help
+doing that at the command-line.
+
 In small projects (like the one in this tutorial), that should be enough.
 However, if you want to perform more complex things with the scraped items, you
-can write an :ref:`Item Pipeline <topics-item-pipeline>`. As with Items, a
-placeholder file for Item Pipelines has been set up for you when the project is
-created, in ``tutorial/pipelines.py``. Though you don't need to implement any item
+can write an :ref:`Item Pipeline <topics-item-pipeline>`. A placeholder file
+for Item Pipelines has been set up for you when the project is created, in
+``tutorial/pipelines.py``. Though you don't need to implement any item
 pipelines if you just want to store the scraped items.
 
+.. _JSON Lines: http://jsonlines.org
+.. _JQ: https://stedolan.github.io/jq
+
+
+Following links
+===============
+
+Let's say, instead of just scraping the stuff from the first two pages
+from http://quotes.toscrape.com, you want quotes from all the pages in the website.
+
+Now that you know how to extract data from pages, let's see how to follow links
+from them.
+
+First thing is to extract the link to the page we want to follow.  Examining
+our page, we can see there is a link to the next page with the following
+markup:
+
+.. code-block:: html
+
+    <ul class="pager">
+        <li class="next">
+            <a href="/page/2/">Next <span aria-hidden="true">&rarr;</span></a>
+        </li>
+    </ul>
+
+We can try extracting it in the shell:
+
+>>> response.css('li.next a').get()
+'<a href="/page/2/">Next <span aria-hidden="true">→</span></a>'
+
+This gets the anchor element, but we want the attribute ``href``. For that,
+Scrapy supports a CSS extension that lets you select the attribute contents,
+like this:
+
+>>> response.css('li.next a::attr(href)').get()
+'/page/2/'
+
+There is also an ``attrib`` property available
+(see :ref:`selecting-attributes` for more):
+
+>>> response.css('li.next a').attrib['href']
+'/page/2/'
+
+Let's see now our spider modified to recursively follow the link to the next
+page, extracting data from it::
+
+    import scrapy
+
+
+    class QuotesSpider(scrapy.Spider):
+        name = "quotes"
+        start_urls = [
+            'http://quotes.toscrape.com/page/1/',
+        ]
+
+        def parse(self, response):
+            for quote in response.css('div.quote'):
+                yield {
+                    'text': quote.css('span.text::text').get(),
+                    'author': quote.css('small.author::text').get(),
+                    'tags': quote.css('div.tags a.tag::text').getall(),
+                }
+
+            next_page = response.css('li.next a::attr(href)').get()
+            if next_page is not None:
+                next_page = response.urljoin(next_page)
+                yield scrapy.Request(next_page, callback=self.parse)
+
+
+Now, after extracting the data, the ``parse()`` method looks for the link to
+the next page, builds a full absolute URL using the
+:meth:`~scrapy.http.Response.urljoin` method (since the links can be
+relative) and yields a new request to the next page, registering itself as
+callback to handle the data extraction for the next page and to keep the
+crawling going through all the pages.
+
+What you see here is Scrapy's mechanism of following links: when you yield
+a Request in a callback method, Scrapy will schedule that request to be sent
+and register a callback method to be executed when that request finishes.
+
+Using this, you can build complex crawlers that follow links according to rules
+you define, and extract different kinds of data depending on the page it's
+visiting.
+
+In our example, it creates a sort of loop, following all the links to the next page
+until it doesn't find one -- handy for crawling blogs, forums and other sites with
+pagination.
+
+
+.. _response-follow-example:
+
+A shortcut for creating Requests
+--------------------------------
+
+As a shortcut for creating Request objects you can use
+:meth:`response.follow <scrapy.http.TextResponse.follow>`::
+
+    import scrapy
+
+
+    class QuotesSpider(scrapy.Spider):
+        name = "quotes"
+        start_urls = [
+            'http://quotes.toscrape.com/page/1/',
+        ]
+
+        def parse(self, response):
+            for quote in response.css('div.quote'):
+                yield {
+                    'text': quote.css('span.text::text').get(),
+                    'author': quote.css('span small::text').get(),
+                    'tags': quote.css('div.tags a.tag::text').getall(),
+                }
+
+            next_page = response.css('li.next a::attr(href)').get()
+            if next_page is not None:
+                yield response.follow(next_page, callback=self.parse)
+
+Unlike scrapy.Request, ``response.follow`` supports relative URLs directly - no
+need to call urljoin. Note that ``response.follow`` just returns a Request
+instance; you still have to yield this Request.
+
+You can also pass a selector to ``response.follow`` instead of a string;
+this selector should extract necessary attributes::
+
+    for href in response.css('ul.pager a::attr(href)'):
+        yield response.follow(href, callback=self.parse)
+
+For ``<a>`` elements there is a shortcut: ``response.follow`` uses their href
+attribute automatically. So the code can be shortened further::
+
+    for a in response.css('ul.pager a'):
+        yield response.follow(a, callback=self.parse)
+
+To create multiple requests from an iterable, you can use
+:meth:`response.follow_all <scrapy.http.TextResponse.follow_all>` instead::
+
+    anchors = response.css('ul.pager a')
+    yield from response.follow_all(anchors, callback=self.parse)
+
+or, shortening it further::
+
+    yield from response.follow_all(css='ul.pager a', callback=self.parse)
+
+
+More examples and patterns
+--------------------------
+
+Here is another spider that illustrates callbacks and following links,
+this time for scraping author information::
+
+    import scrapy
+
+
+    class AuthorSpider(scrapy.Spider):
+        name = 'author'
+
+        start_urls = ['http://quotes.toscrape.com/']
+
+        def parse(self, response):
+            author_page_links = response.css('.author + a')
+            yield from response.follow_all(author_page_links, self.parse_author)
+
+            pagination_links = response.css('li.next a')
+            yield from response.follow_all(pagination_links, self.parse)
+
+        def parse_author(self, response):
+            def extract_with_css(query):
+                return response.css(query).get(default='').strip()
+
+            yield {
+                'name': extract_with_css('h3.author-title::text'),
+                'birthdate': extract_with_css('.author-born-date::text'),
+                'bio': extract_with_css('.author-description::text'),
+            }
+
+This spider will start from the main page, it will follow all the links to the
+authors pages calling the ``parse_author`` callback for each of them, and also
+the pagination links with the ``parse`` callback as we saw before.
+
+Here we're passing callbacks to
+:meth:`response.follow_all <scrapy.http.TextResponse.follow_all>` as positional
+arguments to make the code shorter; it also works for
+:class:`~scrapy.http.Request`.
+
+The ``parse_author`` callback defines a helper function to extract and cleanup the
+data from a CSS query and yields the Python dict with the author data.
+
+Another interesting thing this spider demonstrates is that, even if there are
+many quotes from the same author, we don't need to worry about visiting the
+same author page multiple times. By default, Scrapy filters out duplicated
+requests to URLs already visited, avoiding the problem of hitting servers too
+much because of a programming mistake. This can be configured by the setting
+:setting:`DUPEFILTER_CLASS`.
+
+Hopefully by now you have a good understanding of how to use the mechanism
+of following links and callbacks with Scrapy.
+
+As yet another example spider that leverages the mechanism of following links,
+check out the :class:`~scrapy.spiders.CrawlSpider` class for a generic
+spider that implements a small rules engine that you can use to write your
+crawlers on top of it.
+
+Also, a common pattern is to build an item with data from more than one page,
+using a :ref:`trick to pass additional data to the callbacks
+<topics-request-response-ref-request-callback-arguments>`.
+
+
+Using spider arguments
+======================
+
+You can provide command line arguments to your spiders by using the ``-a``
+option when running them::
+
+    scrapy crawl quotes -O quotes-humor.json -a tag=humor
+
+These arguments are passed to the Spider's ``__init__`` method and become
+spider attributes by default.
+
+In this example, the value provided for the ``tag`` argument will be available
+via ``self.tag``. You can use this to make your spider fetch only quotes
+with a specific tag, building the URL based on the argument::
+
+    import scrapy
+
+
+    class QuotesSpider(scrapy.Spider):
+        name = "quotes"
+
+        def start_requests(self):
+            url = 'http://quotes.toscrape.com/'
+            tag = getattr(self, 'tag', None)
+            if tag is not None:
+                url = url + 'tag/' + tag
+            yield scrapy.Request(url, self.parse)
+
+        def parse(self, response):
+            for quote in response.css('div.quote'):
+                yield {
+                    'text': quote.css('span.text::text').get(),
+                    'author': quote.css('small.author::text').get(),
+                }
+
+            next_page = response.css('li.next a::attr(href)').get()
+            if next_page is not None:
+                yield response.follow(next_page, self.parse)
+
+
+If you pass the ``tag=humor`` argument to this spider, you'll notice that it
+will only visit URLs from the ``humor`` tag, such as
+``http://quotes.toscrape.com/tag/humor``.
+
+You can :ref:`learn more about handling spider arguments here <spiderargs>`.
+
 Next steps
 ==========
 
-This tutorial covers only the basics of Scrapy, but there's a lot of other
+This tutorial covered only the basics of Scrapy, but there's a lot of other
 features not mentioned here. Check the :ref:`topics-whatelse` section in
 :ref:`intro-overview` chapter for a quick overview of the most important ones.
 
-Then, we recommend you continue by playing with an example project (see
-:ref:`intro-examples`), and then continue with the section
-:ref:`section-basics`.
+You can continue from the section :ref:`section-basics` to know more about the
+command-line tool, spiders, selectors and other things the tutorial hasn't covered like
+modeling the scraped data. If you prefer to play with an example project, check
+the :ref:`intro-examples` section.
 
-.. _JSON: http://en.wikipedia.org/wiki/JSON
-.. _dirbot: https://github.com/scrapy/dirbot
+.. _JSON: https://en.wikipedia.org/wiki/JSON
diff --git a/docs/news.rst b/docs/news.rst
index d246e98bc..850b323ef 100644
--- a/docs/news.rst
+++ b/docs/news.rst
@@ -3,14 +3,3120 @@
 Release notes
 =============
 
-0.24.4 (2014-08-09)
--------------------
+.. _release-2.3.0:
+
+Scrapy 2.3.0 (2020-08-04)
+-------------------------
+
+Highlights:
+
+*   :ref:`Feed exports <topics-feed-exports>` now support :ref:`Google Cloud
+    Storage <topics-feed-storage-gcs>` as a storage backend
+
+*   The new :setting:`FEED_EXPORT_BATCH_ITEM_COUNT` setting allows to deliver
+    output items in batches of up to the specified number of items.
+
+    It also serves as a workaround for :ref:`delayed file delivery
+    <delayed-file-delivery>`, which causes Scrapy to only start item delivery
+    after the crawl has finished when using certain storage backends
+    (:ref:`S3 <topics-feed-storage-s3>`, :ref:`FTP <topics-feed-storage-ftp>`,
+    and now :ref:`GCS <topics-feed-storage-gcs>`).
+
+*   The base implementation of :ref:`item loaders <topics-loaders>` has been
+    moved into a separate library, :doc:`itemloaders <itemloaders:index>`,
+    allowing usage from outside Scrapy and a separate release schedule
+
+Deprecation removals
+~~~~~~~~~~~~~~~~~~~~
+
+*   Removed the following classes and their parent modules from
+    ``scrapy.linkextractors``:
+
+    *   ``htmlparser.HtmlParserLinkExtractor``
+    *   ``regex.RegexLinkExtractor``
+    *   ``sgml.BaseSgmlLinkExtractor``
+    *   ``sgml.SgmlLinkExtractor``
+
+    Use
+    :class:`LinkExtractor <scrapy.linkextractors.lxmlhtml.LxmlLinkExtractor>`
+    instead (:issue:`4356`, :issue:`4679`)
+
+
+Deprecations
+~~~~~~~~~~~~
+
+*   The ``scrapy.utils.python.retry_on_eintr`` function is now deprecated
+    (:issue:`4683`)
+
+
+New features
+~~~~~~~~~~~~
+
+*   :ref:`Feed exports <topics-feed-exports>` support :ref:`Google Cloud
+    Storage <topics-feed-storage-gcs>` (:issue:`685`, :issue:`3608`)
+
+*   New :setting:`FEED_EXPORT_BATCH_ITEM_COUNT` setting for batch deliveries
+    (:issue:`4250`, :issue:`4434`)
+
+*   The :command:`parse` command now allows specifying an output file
+    (:issue:`4317`, :issue:`4377`)
+
+*   :meth:`Request.from_curl <scrapy.http.Request.from_curl>` and
+    :func:`~scrapy.utils.curl.curl_to_request_kwargs` now also support
+    ``--data-raw`` (:issue:`4612`)
+
+*   A ``parse`` callback may now be used in built-in spider subclasses, such
+    as :class:`~scrapy.spiders.CrawlSpider` (:issue:`712`, :issue:`732`,
+    :issue:`781`, :issue:`4254` )
+
+
+Bug fixes
+~~~~~~~~~
+
+*   Fixed the :ref:`CSV exporting <topics-feed-format-csv>` of
+    :ref:`dataclass items <dataclass-items>` and :ref:`attr.s items
+    <attrs-items>` (:issue:`4667`, :issue:`4668`)
+
+*   :meth:`Request.from_curl <scrapy.http.Request.from_curl>` and
+    :func:`~scrapy.utils.curl.curl_to_request_kwargs` now set the request
+    method to ``POST`` when a request body is specified and no request method
+    is specified (:issue:`4612`)
+
+*   The processing of ANSI escape sequences in enabled in Windows 10.0.14393
+    and later, where it is required for colored output (:issue:`4393`,
+    :issue:`4403`)
+
+
+Documentation
+~~~~~~~~~~~~~
+
+*   Updated the `OpenSSL cipher list format`_ link in the documentation about
+    the :setting:`DOWNLOADER_CLIENT_TLS_CIPHERS` setting (:issue:`4653`)
+
+*   Simplified the code example in :ref:`topics-loaders-dataclass`
+    (:issue:`4652`)
+
+.. _OpenSSL cipher list format: https://www.openssl.org/docs/manmaster/man1/openssl-ciphers.html#CIPHER-LIST-FORMAT
+
+
+Quality assurance
+~~~~~~~~~~~~~~~~~
+
+*   The base implementation of :ref:`item loaders <topics-loaders>` has been
+    moved into :doc:`itemloaders <itemloaders:index>` (:issue:`4005`,
+    :issue:`4516`)
+
+*   Fixed a silenced error in some scheduler tests (:issue:`4644`,
+    :issue:`4645`)
+
+*   Renewed the localhost certificate used for SSL tests (:issue:`4650`)
+
+*   Removed cookie-handling code specific to Python 2 (:issue:`4682`)
+
+*   Stopped using Python 2 unicode literal syntax (:issue:`4704`)
+
+*   Stopped using a backlash for line continuation (:issue:`4673`)
+
+*   Removed unneeded entries from the MyPy exception list (:issue:`4690`)
+
+*   Automated tests now pass on Windows as part of our continuous integration
+    system (:issue:`4458`)
+
+*   Automated tests now pass on the latest PyPy version for supported Python
+    versions in our continuous integration system (:issue:`4504`)
+
+
+.. _release-2.2.1:
+
+Scrapy 2.2.1 (2020-07-17)
+-------------------------
+
+*   The :command:`startproject` command no longer makes unintended changes to
+    the permissions of files in the destination folder, such as removing
+    execution permissions (:issue:`4662`, :issue:`4666`)
+
+
+.. _release-2.2.0:
+
+Scrapy 2.2.0 (2020-06-24)
+-------------------------
+
+Highlights:
+
+* Python 3.5.2+ is required now
+* :ref:`dataclass objects <dataclass-items>` and
+  :ref:`attrs objects <attrs-items>` are now valid :ref:`item types
+  <item-types>`
+* New :meth:`TextResponse.json <scrapy.http.TextResponse.json>` method
+* New :signal:`bytes_received` signal that allows canceling response download
+* :class:`~scrapy.downloadermiddlewares.cookies.CookiesMiddleware` fixes
+
+Backward-incompatible changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+*   Support for Python 3.5.0 and 3.5.1 has been dropped; Scrapy now refuses to
+    run with a Python version lower than 3.5.2, which introduced
+    :class:`typing.Type` (:issue:`4615`)
+
+
+Deprecations
+~~~~~~~~~~~~
+
+*   :meth:`TextResponse.body_as_unicode
+    <scrapy.http.TextResponse.body_as_unicode>` is now deprecated, use
+    :attr:`TextResponse.text <scrapy.http.TextResponse.text>` instead
+    (:issue:`4546`, :issue:`4555`, :issue:`4579`)
+
+*   :class:`scrapy.item.BaseItem` is now deprecated, use
+    :class:`scrapy.item.Item` instead (:issue:`4534`)
+
+
+New features
+~~~~~~~~~~~~
+
+*   :ref:`dataclass objects <dataclass-items>` and
+    :ref:`attrs objects <attrs-items>` are now valid :ref:`item types
+    <item-types>`, and a new itemadapter_ library makes it easy to
+    write code that :ref:`supports any item type <supporting-item-types>`
+    (:issue:`2749`, :issue:`2807`, :issue:`3761`, :issue:`3881`, :issue:`4642`)
+
+*   A new :meth:`TextResponse.json <scrapy.http.TextResponse.json>` method
+    allows to deserialize JSON responses (:issue:`2444`, :issue:`4460`,
+    :issue:`4574`)
+
+*   A new :signal:`bytes_received` signal allows monitoring response download
+    progress and :ref:`stopping downloads <topics-stop-response-download>`
+    (:issue:`4205`, :issue:`4559`)
+
+*   The dictionaries in the result list of a :ref:`media pipeline
+    <topics-media-pipeline>` now include a new key, ``status``, which indicates
+    if the file was downloaded or, if the file was not downloaded, why it was
+    not downloaded; see :meth:`FilesPipeline.get_media_requests
+    <scrapy.pipelines.files.FilesPipeline.get_media_requests>` for more
+    information (:issue:`2893`, :issue:`4486`)
+
+*   When using :ref:`Google Cloud Storage <media-pipeline-gcs>` for
+    a :ref:`media pipeline <topics-media-pipeline>`, a warning is now logged if
+    the configured credentials do not grant the required permissions
+    (:issue:`4346`, :issue:`4508`)
+
+*   :ref:`Link extractors <topics-link-extractors>` are now serializable,
+    as long as you do not use :ref:`lambdas <lambda>` for parameters; for
+    example, you can now pass link extractors in :attr:`Request.cb_kwargs
+    <scrapy.http.Request.cb_kwargs>` or
+    :attr:`Request.meta <scrapy.http.Request.meta>` when :ref:`persisting
+    scheduled requests <topics-jobs>` (:issue:`4554`)
+
+*   Upgraded the :ref:`pickle protocol <pickle-protocols>` that Scrapy uses
+    from protocol 2 to protocol 4, improving serialization capabilities and
+    performance (:issue:`4135`, :issue:`4541`)
+
+*   :func:`scrapy.utils.misc.create_instance` now raises a :exc:`TypeError`
+    exception if the resulting instance is ``None`` (:issue:`4528`,
+    :issue:`4532`)
+
+.. _itemadapter: https://github.com/scrapy/itemadapter
+
+
+Bug fixes
+~~~~~~~~~
+
+*   :class:`~scrapy.downloadermiddlewares.cookies.CookiesMiddleware` no longer
+    discards cookies defined in :attr:`Request.headers
+    <scrapy.http.Request.headers>` (:issue:`1992`, :issue:`2400`)
+
+*   :class:`~scrapy.downloadermiddlewares.cookies.CookiesMiddleware` no longer
+    re-encodes cookies defined as :class:`bytes` in the ``cookies`` parameter
+    of the ``__init__`` method of :class:`~scrapy.http.Request`
+    (:issue:`2400`, :issue:`3575`)
+
+*   When :setting:`FEEDS` defines multiple URIs, :setting:`FEED_STORE_EMPTY` is
+    ``False`` and the crawl yields no items, Scrapy no longer stops feed
+    exports after the first URI (:issue:`4621`, :issue:`4626`)
+
+*   :class:`~scrapy.spiders.Spider` callbacks defined using :doc:`coroutine
+    syntax <topics/coroutines>` no longer need to return an iterable, and may
+    instead return a :class:`~scrapy.http.Request` object, an
+    :ref:`item <topics-items>`, or ``None`` (:issue:`4609`)
+
+*   The :command:`startproject` command now ensures that the generated project
+    folders and files have the right permissions (:issue:`4604`)
+
+*   Fix a :exc:`KeyError` exception being sometimes raised from
+    :class:`scrapy.utils.datatypes.LocalWeakReferencedCache` (:issue:`4597`,
+    :issue:`4599`)
+
+*   When :setting:`FEEDS` defines multiple URIs, log messages about items being
+    stored now contain information from the corresponding feed, instead of
+    always containing information about only one of the feeds (:issue:`4619`,
+    :issue:`4629`)
+
+
+Documentation
+~~~~~~~~~~~~~
+
+*   Added a new section about :ref:`accessing cb_kwargs from errbacks
+    <errback-cb_kwargs>` (:issue:`4598`, :issue:`4634`)
+
+*   Covered chompjs_ in :ref:`topics-parsing-javascript` (:issue:`4556`,
+    :issue:`4562`)
+
+*   Removed from :doc:`topics/coroutines` the warning about the API being
+    experimental (:issue:`4511`, :issue:`4513`)
+
+*   Removed references to unsupported versions of :doc:`Twisted
+    <twisted:index>` (:issue:`4533`)
+
+*   Updated the description of the :ref:`screenshot pipeline example
+    <ScreenshotPipeline>`, which now uses :doc:`coroutine syntax
+    <topics/coroutines>` instead of returning a
+    :class:`~twisted.internet.defer.Deferred` (:issue:`4514`, :issue:`4593`)
+
+*   Removed a misleading import line from the
+    :func:`scrapy.utils.log.configure_logging` code example (:issue:`4510`,
+    :issue:`4587`)
+
+*   The display-on-hover behavior of internal documentation references now also
+    covers links to :ref:`commands <topics-commands>`, :attr:`Request.meta
+    <scrapy.http.Request.meta>` keys, :ref:`settings <topics-settings>` and
+    :ref:`signals <topics-signals>` (:issue:`4495`, :issue:`4563`)
+
+*   It is again possible to download the documentation for offline reading
+    (:issue:`4578`, :issue:`4585`)
+
+*   Removed backslashes preceding ``*args`` and ``**kwargs`` in some function
+    and method signatures (:issue:`4592`, :issue:`4596`)
+
+.. _chompjs: https://github.com/Nykakin/chompjs
+
+
+Quality assurance
+~~~~~~~~~~~~~~~~~
+
+*   Adjusted the code base further to our :ref:`style guidelines
+    <coding-style>` (:issue:`4237`, :issue:`4525`, :issue:`4538`,
+    :issue:`4539`, :issue:`4540`, :issue:`4542`, :issue:`4543`, :issue:`4544`,
+    :issue:`4545`, :issue:`4557`, :issue:`4558`, :issue:`4566`, :issue:`4568`,
+    :issue:`4572`)
+
+*   Removed remnants of Python 2 support (:issue:`4550`, :issue:`4553`,
+    :issue:`4568`)
+
+*   Improved code sharing between the :command:`crawl` and :command:`runspider`
+    commands (:issue:`4548`, :issue:`4552`)
+
+*   Replaced ``chain(*iterable)`` with ``chain.from_iterable(iterable)``
+    (:issue:`4635`)
+
+*   You may now run the :mod:`asyncio` tests with Tox on any Python version
+    (:issue:`4521`)
+
+*   Updated test requirements to reflect an incompatibility with pytest 5.4 and
+    5.4.1 (:issue:`4588`)
+
+*   Improved :class:`~scrapy.spiderloader.SpiderLoader` test coverage for
+    scenarios involving duplicate spider names (:issue:`4549`, :issue:`4560`)
+
+*   Configured Travis CI to also run the tests with Python 3.5.2
+    (:issue:`4518`, :issue:`4615`)
+
+*   Added a `Pylint <https://www.pylint.org/>`_ job to Travis CI
+    (:issue:`3727`)
+
+*   Added a `Mypy <http://mypy-lang.org/>`_ job to Travis CI (:issue:`4637`)
+
+*   Made use of set literals in tests (:issue:`4573`)
+
+*   Cleaned up the Travis CI configuration (:issue:`4517`, :issue:`4519`,
+    :issue:`4522`, :issue:`4537`)
+
+
+.. _release-2.1.0:
+
+Scrapy 2.1.0 (2020-04-24)
+-------------------------
+
+Highlights:
+
+* New :setting:`FEEDS` setting to export to multiple feeds
+* New :attr:`Response.ip_address <scrapy.http.Response.ip_address>` attribute
+
+Backward-incompatible changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+*   :exc:`AssertionError` exceptions triggered by :ref:`assert <assert>`
+    statements have been replaced by new exception types, to support running
+    Python in optimized mode (see :option:`-O`) without changing Scrapy’s
+    behavior in any unexpected ways.
+
+    If you catch an :exc:`AssertionError` exception from Scrapy, update your
+    code to catch the corresponding new exception.
+
+    (:issue:`4440`)
+
+
+Deprecation removals
+~~~~~~~~~~~~~~~~~~~~
+
+*   The ``LOG_UNSERIALIZABLE_REQUESTS`` setting is no longer supported, use
+    :setting:`SCHEDULER_DEBUG` instead (:issue:`4385`)
+
+*   The ``REDIRECT_MAX_METAREFRESH_DELAY`` setting is no longer supported, use
+    :setting:`METAREFRESH_MAXDELAY` instead (:issue:`4385`)
+
+*   The :class:`~scrapy.downloadermiddlewares.chunked.ChunkedTransferMiddleware`
+    middleware has been removed, including the entire
+    :class:`scrapy.downloadermiddlewares.chunked` module; chunked transfers
+    work out of the box (:issue:`4431`)
+
+*   The ``spiders`` property has been removed from
+    :class:`~scrapy.crawler.Crawler`, use :class:`CrawlerRunner.spider_loader
+    <scrapy.crawler.CrawlerRunner.spider_loader>` or instantiate
+    :setting:`SPIDER_LOADER_CLASS` with your settings instead (:issue:`4398`)
+
+*   The ``MultiValueDict``, ``MultiValueDictKeyError``, and ``SiteNode``
+    classes have been removed from :mod:`scrapy.utils.datatypes`
+    (:issue:`4400`)
+
+
+Deprecations
+~~~~~~~~~~~~
+
+*   The ``FEED_FORMAT`` and ``FEED_URI`` settings have been deprecated in
+    favor of the new :setting:`FEEDS` setting (:issue:`1336`, :issue:`3858`,
+    :issue:`4507`)
+
+
+New features
+~~~~~~~~~~~~
+
+*   A new setting, :setting:`FEEDS`, allows configuring multiple output feeds
+    with different settings each (:issue:`1336`, :issue:`3858`, :issue:`4507`)
+
+*   The :command:`crawl` and :command:`runspider` commands now support multiple
+    ``-o`` parameters (:issue:`1336`, :issue:`3858`, :issue:`4507`)
+
+*   The :command:`crawl` and :command:`runspider` commands now support
+    specifying an output format by appending ``:<format>`` to the output file
+    (:issue:`1336`, :issue:`3858`, :issue:`4507`)
+
+*   The new :attr:`Response.ip_address <scrapy.http.Response.ip_address>`
+    attribute gives access to the IP address that originated a response
+    (:issue:`3903`, :issue:`3940`)
+
+*   A warning is now issued when a value in
+    :attr:`~scrapy.spiders.Spider.allowed_domains` includes a port
+    (:issue:`50`, :issue:`3198`, :issue:`4413`)
+
+*   Zsh completion now excludes used option aliases from the completion list
+    (:issue:`4438`)
+
+
+Bug fixes
+~~~~~~~~~
+
+*   :ref:`Request serialization <request-serialization>` no longer breaks for
+    callbacks that are spider attributes which are assigned a function with a
+    different name (:issue:`4500`)
+
+*   ``None`` values in :attr:`~scrapy.spiders.Spider.allowed_domains` no longer
+    cause a :exc:`TypeError` exception (:issue:`4410`)
+
+*   Zsh completion no longer allows options after arguments (:issue:`4438`)
+
+*   zope.interface 5.0.0 and later versions are now supported
+    (:issue:`4447`, :issue:`4448`)
+
+*   :meth:`Spider.make_requests_from_url
+    <scrapy.spiders.Spider.make_requests_from_url>`, deprecated in Scrapy
+    1.4.0, now issues a warning when used (:issue:`4412`)
+
+
+Documentation
+~~~~~~~~~~~~~
+
+*   Improved the documentation about signals that allow their handlers to
+    return a :class:`~twisted.internet.defer.Deferred` (:issue:`4295`,
+    :issue:`4390`)
+
+*   Our PyPI entry now includes links for our documentation, our source code
+    repository and our issue tracker (:issue:`4456`)
+
+*   Covered the `curl2scrapy <https://michael-shub.github.io/curl2scrapy/>`_
+    service in the documentation (:issue:`4206`, :issue:`4455`)
+
+*   Removed references to the Guppy library, which only works in Python 2
+    (:issue:`4285`, :issue:`4343`)
+
+*   Extended use of InterSphinx to link to Python 3 documentation
+    (:issue:`4444`, :issue:`4445`)
+
+*   Added support for Sphinx 3.0 and later (:issue:`4475`, :issue:`4480`,
+    :issue:`4496`, :issue:`4503`)
+
+
+Quality assurance
+~~~~~~~~~~~~~~~~~
+
+*   Removed warnings about using old, removed settings (:issue:`4404`)
+
+*   Removed a warning about importing
+    :class:`~twisted.internet.testing.StringTransport` from
+    ``twisted.test.proto_helpers`` in Twisted 19.7.0 or newer (:issue:`4409`)
+
+*   Removed outdated Debian package build files (:issue:`4384`)
+
+*   Removed :class:`object` usage as a base class (:issue:`4430`)
+
+*   Removed code that added support for old versions of Twisted that we no
+    longer support (:issue:`4472`)
+
+*   Fixed code style issues (:issue:`4468`, :issue:`4469`, :issue:`4471`,
+    :issue:`4481`)
+
+*   Removed :func:`twisted.internet.defer.returnValue` calls (:issue:`4443`,
+    :issue:`4446`, :issue:`4489`)
+
+
+.. _release-2.0.1:
+
+Scrapy 2.0.1 (2020-03-18)
+-------------------------
+
+*   :meth:`Response.follow_all <scrapy.http.Response.follow_all>` now supports
+    an empty URL iterable as input (:issue:`4408`, :issue:`4420`)
+
+*   Removed top-level :mod:`~twisted.internet.reactor` imports to prevent
+    errors about the wrong Twisted reactor being installed when setting a
+    different Twisted reactor using :setting:`TWISTED_REACTOR` (:issue:`4401`,
+    :issue:`4406`)
+
+*   Fixed tests (:issue:`4422`)
+
+
+.. _release-2.0.0:
+
+Scrapy 2.0.0 (2020-03-03)
+-------------------------
+
+Highlights:
+
+* Python 2 support has been removed
+* :doc:`Partial <topics/coroutines>` :ref:`coroutine syntax <async>` support
+  and :doc:`experimental <topics/asyncio>` :mod:`asyncio` support
+* New :meth:`Response.follow_all <scrapy.http.Response.follow_all>` method
+* :ref:`FTP support <media-pipeline-ftp>` for media pipelines
+* New :attr:`Response.certificate <scrapy.http.Response.certificate>`
+  attribute
+* IPv6 support through :setting:`DNS_RESOLVER`
+
+Backward-incompatible changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+*   Python 2 support has been removed, following `Python 2 end-of-life on
+    January 1, 2020`_ (:issue:`4091`, :issue:`4114`, :issue:`4115`,
+    :issue:`4121`, :issue:`4138`, :issue:`4231`, :issue:`4242`, :issue:`4304`,
+    :issue:`4309`, :issue:`4373`)
+
+*   Retry gaveups (see :setting:`RETRY_TIMES`) are now logged as errors instead
+    of as debug information (:issue:`3171`, :issue:`3566`)
+
+*   File extensions that
+    :class:`LinkExtractor <scrapy.linkextractors.lxmlhtml.LxmlLinkExtractor>`
+    ignores by default now also include ``7z``, ``7zip``, ``apk``, ``bz2``,
+    ``cdr``, ``dmg``, ``ico``, ``iso``, ``tar``, ``tar.gz``, ``webm``, and
+    ``xz`` (:issue:`1837`, :issue:`2067`, :issue:`4066`)
+
+*   The :setting:`METAREFRESH_IGNORE_TAGS` setting is now an empty list by
+    default, following web browser behavior (:issue:`3844`, :issue:`4311`)
+
+*   The
+    :class:`~scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware`
+    now includes spaces after commas in the value of the ``Accept-Encoding``
+    header that it sets, following web browser behavior (:issue:`4293`)
+
+*   The ``__init__`` method of custom download handlers (see
+    :setting:`DOWNLOAD_HANDLERS`) or subclasses of the following downloader
+    handlers  no longer receives a ``settings`` parameter:
+
+    *   :class:`scrapy.core.downloader.handlers.datauri.DataURIDownloadHandler`
+
+    *   :class:`scrapy.core.downloader.handlers.file.FileDownloadHandler`
+
+    Use the ``from_settings`` or ``from_crawler`` class methods to expose such
+    a parameter to your custom download handlers.
+
+    (:issue:`4126`)
+
+*   We have refactored the :class:`scrapy.core.scheduler.Scheduler` class and
+    related queue classes (see :setting:`SCHEDULER_PRIORITY_QUEUE`,
+    :setting:`SCHEDULER_DISK_QUEUE` and :setting:`SCHEDULER_MEMORY_QUEUE`) to
+    make it easier to implement custom scheduler queue classes. See
+    :ref:`2-0-0-scheduler-queue-changes` below for details.
+
+*   Overridden settings are now logged in a different format. This is more in
+    line with similar information logged at startup (:issue:`4199`)
+
+.. _Python 2 end-of-life on January 1, 2020: https://www.python.org/doc/sunset-python-2/
+
+
+Deprecation removals
+~~~~~~~~~~~~~~~~~~~~
+
+*   The :ref:`Scrapy shell <topics-shell>` no longer provides a `sel` proxy
+    object, use :meth:`response.selector <scrapy.http.Response.selector>`
+    instead (:issue:`4347`)
+
+*   LevelDB support has been removed (:issue:`4112`)
+
+*   The following functions have been removed from :mod:`scrapy.utils.python`:
+    ``isbinarytext``, ``is_writable``, ``setattr_default``, ``stringify_dict``
+    (:issue:`4362`)
+
+
+Deprecations
+~~~~~~~~~~~~
+
+*   Using environment variables prefixed with ``SCRAPY_`` to override settings
+    is deprecated (:issue:`4300`, :issue:`4374`, :issue:`4375`)
+
+*   :class:`scrapy.linkextractors.FilteringLinkExtractor` is deprecated, use
+    :class:`scrapy.linkextractors.LinkExtractor
+    <scrapy.linkextractors.lxmlhtml.LxmlLinkExtractor>` instead (:issue:`4045`)
+
+*   The ``noconnect`` query string argument of proxy URLs is deprecated and
+    should be removed from proxy URLs (:issue:`4198`)
+
+*   The :meth:`next <scrapy.utils.python.MutableChain.next>` method of
+    :class:`scrapy.utils.python.MutableChain` is deprecated, use the global
+    :func:`next` function or :meth:`MutableChain.__next__
+    <scrapy.utils.python.MutableChain.__next__>` instead (:issue:`4153`)
+
+
+New features
+~~~~~~~~~~~~
+
+*   Added :doc:`partial support <topics/coroutines>` for Python’s
+    :ref:`coroutine syntax <async>` and :doc:`experimental support
+    <topics/asyncio>` for :mod:`asyncio` and :mod:`asyncio`-powered libraries
+    (:issue:`4010`, :issue:`4259`, :issue:`4269`, :issue:`4270`, :issue:`4271`,
+    :issue:`4316`, :issue:`4318`)
+
+*   The new :meth:`Response.follow_all <scrapy.http.Response.follow_all>`
+    method offers the same functionality as
+    :meth:`Response.follow <scrapy.http.Response.follow>` but supports an
+    iterable of URLs as input and returns an iterable of requests
+    (:issue:`2582`, :issue:`4057`, :issue:`4286`)
+
+*   :ref:`Media pipelines <topics-media-pipeline>` now support :ref:`FTP
+    storage <media-pipeline-ftp>` (:issue:`3928`, :issue:`3961`)
+
+*   The new :attr:`Response.certificate <scrapy.http.Response.certificate>`
+    attribute exposes the SSL certificate of the server as a
+    :class:`twisted.internet.ssl.Certificate` object for HTTPS responses
+    (:issue:`2726`, :issue:`4054`)
+
+*   A new :setting:`DNS_RESOLVER` setting allows enabling IPv6 support
+    (:issue:`1031`, :issue:`4227`)
+
+*   A new :setting:`SCRAPER_SLOT_MAX_ACTIVE_SIZE` setting allows configuring
+    the existing soft limit that pauses request downloads when the total
+    response data being processed is too high (:issue:`1410`, :issue:`3551`)
+
+*   A new :setting:`TWISTED_REACTOR` setting allows customizing the
+    :mod:`~twisted.internet.reactor` that Scrapy uses, allowing to
+    :doc:`enable asyncio support <topics/asyncio>` or deal with a
+    :ref:`common macOS issue <faq-specific-reactor>` (:issue:`2905`,
+    :issue:`4294`)
+
+*   Scheduler disk and memory queues may now use the class methods
+    ``from_crawler`` or ``from_settings`` (:issue:`3884`)
+
+*   The new :attr:`Response.cb_kwargs <scrapy.http.Response.cb_kwargs>`
+    attribute serves as a shortcut for :attr:`Response.request.cb_kwargs
+    <scrapy.http.Request.cb_kwargs>` (:issue:`4331`)
+
+*   :meth:`Response.follow <scrapy.http.Response.follow>` now supports a
+    ``flags`` parameter, for consistency with :class:`~scrapy.http.Request`
+    (:issue:`4277`, :issue:`4279`)
+
+*   :ref:`Item loader processors <topics-loaders-processors>` can now be
+    regular functions, they no longer need to be methods (:issue:`3899`)
+
+*   :class:`~scrapy.spiders.Rule` now accepts an ``errback`` parameter
+    (:issue:`4000`)
+
+*   :class:`~scrapy.http.Request` no longer requires a ``callback`` parameter
+    when an ``errback`` parameter is specified (:issue:`3586`, :issue:`4008`)
+
+*   :class:`~scrapy.logformatter.LogFormatter` now supports some additional
+    methods:
+
+    *   :class:`~scrapy.logformatter.LogFormatter.download_error` for
+        download errors
+
+    *   :class:`~scrapy.logformatter.LogFormatter.item_error` for exceptions
+        raised during item processing by :ref:`item pipelines
+        <topics-item-pipeline>`
+
+    *   :class:`~scrapy.logformatter.LogFormatter.spider_error` for exceptions
+        raised from :ref:`spider callbacks <topics-spiders>`
+
+    (:issue:`374`, :issue:`3986`, :issue:`3989`, :issue:`4176`, :issue:`4188`)
+
+*   The :setting:`FEED_URI` setting now supports :class:`pathlib.Path` values
+    (:issue:`3731`, :issue:`4074`)
+
+*   A new :signal:`request_left_downloader` signal is sent when a request
+    leaves the downloader (:issue:`4303`)
+
+*   Scrapy logs a warning when it detects a request callback or errback that
+    uses ``yield`` but also returns a value, since the returned value would be
+    lost (:issue:`3484`, :issue:`3869`)
+
+*   :class:`~scrapy.spiders.Spider` objects now raise an :exc:`AttributeError`
+    exception if they do not have a :class:`~scrapy.spiders.Spider.start_urls`
+    attribute nor reimplement :class:`~scrapy.spiders.Spider.start_requests`,
+    but have a ``start_url`` attribute (:issue:`4133`, :issue:`4170`)
+
+*   :class:`~scrapy.exporters.BaseItemExporter` subclasses may now use
+    ``super().__init__(**kwargs)`` instead of ``self._configure(kwargs)`` in
+    their ``__init__`` method, passing ``dont_fail=True`` to the parent
+    ``__init__`` method if needed, and accessing ``kwargs`` at ``self._kwargs``
+    after calling their parent ``__init__`` method (:issue:`4193`,
+    :issue:`4370`)
+
+*   A new ``keep_fragments`` parameter of
+    :func:`scrapy.utils.request.request_fingerprint` allows to generate
+    different fingerprints for requests with different fragments in their URL
+    (:issue:`4104`)
+
+*   Download handlers (see :setting:`DOWNLOAD_HANDLERS`) may now use the
+    ``from_settings`` and ``from_crawler`` class methods that other Scrapy
+    components already supported (:issue:`4126`)
+
+*   :class:`scrapy.utils.python.MutableChain.__iter__` now returns ``self``,
+    `allowing it to be used as a sequence <https://lgtm.com/rules/4850080/>`_
+    (:issue:`4153`)
+
+
+Bug fixes
+~~~~~~~~~
+
+*   The :command:`crawl` command now also exits with exit code 1 when an
+    exception happens before the crawling starts (:issue:`4175`, :issue:`4207`)
+
+*   :class:`LinkExtractor.extract_links
+    <scrapy.linkextractors.lxmlhtml.LxmlLinkExtractor.extract_links>` no longer
+    re-encodes the query string or URLs from non-UTF-8 responses in UTF-8
+    (:issue:`998`, :issue:`1403`, :issue:`1949`, :issue:`4321`)
+
+*   The first spider middleware (see :setting:`SPIDER_MIDDLEWARES`) now also
+    processes exceptions raised from callbacks that are generators
+    (:issue:`4260`, :issue:`4272`)
+
+*   Redirects to URLs starting with 3 slashes (``///``) are now supported
+    (:issue:`4032`, :issue:`4042`)
+
+*   :class:`~scrapy.http.Request` no longer accepts strings as ``url`` simply
+    because they have a colon (:issue:`2552`, :issue:`4094`)
+
+*   The correct encoding is now used for attach names in
+    :class:`~scrapy.mail.MailSender` (:issue:`4229`, :issue:`4239`)
+
+*   :class:`~scrapy.dupefilters.RFPDupeFilter`, the default
+    :setting:`DUPEFILTER_CLASS`, no longer writes an extra ``\r`` character on
+    each line in Windows, which made the size of the ``requests.seen`` file
+    unnecessarily large on that platform (:issue:`4283`)
+
+*   Z shell auto-completion now looks for ``.html`` files, not ``.http`` files,
+    and covers the ``-h`` command-line switch (:issue:`4122`, :issue:`4291`)
+
+*   Adding items to a :class:`scrapy.utils.datatypes.LocalCache` object
+    without a ``limit`` defined no longer raises a :exc:`TypeError` exception
+    (:issue:`4123`)
+
+*   Fixed a typo in the message of the :exc:`ValueError` exception raised when
+    :func:`scrapy.utils.misc.create_instance` gets both ``settings`` and
+    ``crawler`` set to ``None`` (:issue:`4128`)
+
+
+Documentation
+~~~~~~~~~~~~~
+
+*   API documentation now links to an online, syntax-highlighted view of the
+    corresponding source code (:issue:`4148`)
+
+*   Links to unexisting documentation pages now allow access to the sidebar
+    (:issue:`4152`, :issue:`4169`)
+
+*   Cross-references within our documentation now display a tooltip when
+    hovered (:issue:`4173`, :issue:`4183`)
+
+*   Improved the documentation about :meth:`LinkExtractor.extract_links
+    <scrapy.linkextractors.lxmlhtml.LxmlLinkExtractor.extract_links>` and
+    simplified :ref:`topics-link-extractors` (:issue:`4045`)
+
+*   Clarified how :class:`ItemLoader.item <scrapy.loader.ItemLoader.item>`
+    works (:issue:`3574`, :issue:`4099`)
+
+*   Clarified that :func:`logging.basicConfig` should not be used when also
+    using :class:`~scrapy.crawler.CrawlerProcess` (:issue:`2149`,
+    :issue:`2352`, :issue:`3146`, :issue:`3960`)
+
+*   Clarified the requirements for :class:`~scrapy.http.Request` objects
+    :ref:`when using persistence <request-serialization>` (:issue:`4124`,
+    :issue:`4139`)
+
+*   Clarified how to install a :ref:`custom image pipeline
+    <media-pipeline-example>` (:issue:`4034`, :issue:`4252`)
+
+*   Fixed the signatures of the ``file_path`` method in :ref:`media pipeline
+    <topics-media-pipeline>` examples (:issue:`4290`)
+
+*   Covered a backward-incompatible change in Scrapy 1.7.0 affecting custom
+    :class:`scrapy.core.scheduler.Scheduler` subclasses (:issue:`4274`)
+
+*   Improved the ``README.rst`` and ``CODE_OF_CONDUCT.md`` files
+    (:issue:`4059`)
+
+*   Documentation examples are now checked as part of our test suite and we
+    have fixed some of the issues detected (:issue:`4142`, :issue:`4146`,
+    :issue:`4171`, :issue:`4184`, :issue:`4190`)
+
+*   Fixed logic issues, broken links and typos (:issue:`4247`, :issue:`4258`,
+    :issue:`4282`, :issue:`4288`, :issue:`4305`, :issue:`4308`, :issue:`4323`,
+    :issue:`4338`, :issue:`4359`, :issue:`4361`)
+
+*   Improved consistency when referring to the ``__init__`` method of an object
+    (:issue:`4086`, :issue:`4088`)
+
+*   Fixed an inconsistency between code and output in :ref:`intro-overview`
+    (:issue:`4213`)
+
+*   Extended :mod:`~sphinx.ext.intersphinx` usage (:issue:`4147`,
+    :issue:`4172`, :issue:`4185`, :issue:`4194`, :issue:`4197`)
+
+*   We now use a recent version of Python to build the documentation
+    (:issue:`4140`, :issue:`4249`)
+
+*   Cleaned up documentation (:issue:`4143`, :issue:`4275`)
+
+
+Quality assurance
+~~~~~~~~~~~~~~~~~
+
+*   Re-enabled proxy ``CONNECT`` tests (:issue:`2545`, :issue:`4114`)
+
+*   Added Bandit_ security checks to our test suite (:issue:`4162`,
+    :issue:`4181`)
+
+*   Added Flake8_ style checks to our test suite and applied many of the
+    corresponding changes (:issue:`3944`, :issue:`3945`, :issue:`4137`,
+    :issue:`4157`, :issue:`4167`, :issue:`4174`, :issue:`4186`, :issue:`4195`,
+    :issue:`4238`, :issue:`4246`, :issue:`4355`, :issue:`4360`, :issue:`4365`)
+
+*   Improved test coverage (:issue:`4097`, :issue:`4218`, :issue:`4236`)
+
+*   Started reporting slowest tests, and improved the performance of some of
+    them (:issue:`4163`, :issue:`4164`)
+
+*   Fixed broken tests and refactored some tests (:issue:`4014`, :issue:`4095`,
+    :issue:`4244`, :issue:`4268`, :issue:`4372`)
+
+*   Modified the :doc:`tox <tox:index>` configuration to allow running tests
+    with any Python version, run Bandit_ and Flake8_ tests by default, and
+    enforce a minimum tox version programmatically (:issue:`4179`)
+
+*   Cleaned up code (:issue:`3937`, :issue:`4208`, :issue:`4209`,
+    :issue:`4210`, :issue:`4212`, :issue:`4369`, :issue:`4376`, :issue:`4378`)
+
+.. _Bandit: https://bandit.readthedocs.io/
+.. _Flake8: https://flake8.pycqa.org/en/latest/
+
+
+.. _2-0-0-scheduler-queue-changes:
+
+Changes to scheduler queue classes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The following changes may impact any custom queue classes of all types:
+
+*   The ``push`` method no longer receives a second positional parameter
+    containing ``request.priority * -1``. If you need that value, get it
+    from the first positional parameter, ``request``, instead, or use
+    the new :meth:`~scrapy.core.scheduler.ScrapyPriorityQueue.priority`
+    method in :class:`scrapy.core.scheduler.ScrapyPriorityQueue`
+    subclasses.
+
+The following changes may impact custom priority queue classes:
+
+*   In the ``__init__`` method or the ``from_crawler`` or ``from_settings``
+    class methods:
+
+    *   The parameter that used to contain a factory function,
+        ``qfactory``, is now passed as a keyword parameter named
+        ``downstream_queue_cls``.
+
+    *   A new keyword parameter has been added: ``key``. It is a string
+        that is always an empty string for memory queues and indicates the
+        :setting:`JOB_DIR` value for disk queues.
+
+    *   The parameter for disk queues that contains data from the previous
+        crawl, ``startprios`` or ``slot_startprios``, is now passed as a
+        keyword parameter named ``startprios``.
+
+    *   The ``serialize`` parameter is no longer passed. The disk queue
+        class must take care of request serialization on its own before
+        writing to disk, using the
+        :func:`~scrapy.utils.reqser.request_to_dict` and
+        :func:`~scrapy.utils.reqser.request_from_dict` functions from the
+        :mod:`scrapy.utils.reqser` module.
+
+The following changes may impact custom disk and memory queue classes:
+
+*   The signature of the ``__init__`` method is now
+    ``__init__(self, crawler, key)``.
+
+The following changes affect specifically the
+:class:`~scrapy.core.scheduler.ScrapyPriorityQueue` and
+:class:`~scrapy.core.scheduler.DownloaderAwarePriorityQueue` classes from
+:mod:`scrapy.core.scheduler` and may affect subclasses:
+
+*   In the ``__init__`` method, most of the changes described above apply.
+
+    ``__init__`` may still receive all parameters as positional parameters,
+    however:
+
+    *   ``downstream_queue_cls``, which replaced ``qfactory``, must be
+        instantiated differently.
+
+        ``qfactory`` was instantiated with a priority value (integer).
+
+        Instances of ``downstream_queue_cls`` should be created using
+        the new
+        :meth:`ScrapyPriorityQueue.qfactory <scrapy.core.scheduler.ScrapyPriorityQueue.qfactory>`
+        or
+        :meth:`DownloaderAwarePriorityQueue.pqfactory <scrapy.core.scheduler.DownloaderAwarePriorityQueue.pqfactory>`
+        methods.
+
+    *   The new ``key`` parameter displaced the ``startprios``
+        parameter 1 position to the right.
+
+*   The following class attributes have been added:
+
+    *   :attr:`~scrapy.core.scheduler.ScrapyPriorityQueue.crawler`
+
+    *   :attr:`~scrapy.core.scheduler.ScrapyPriorityQueue.downstream_queue_cls`
+        (details above)
+
+    *   :attr:`~scrapy.core.scheduler.ScrapyPriorityQueue.key` (details above)
+
+*   The ``serialize`` attribute has been removed (details above)
+
+The following changes affect specifically the
+:class:`~scrapy.core.scheduler.ScrapyPriorityQueue` class and may affect
+subclasses:
+
+*   A new :meth:`~scrapy.core.scheduler.ScrapyPriorityQueue.priority`
+    method has been added which, given a request, returns
+    ``request.priority * -1``.
+
+    It is used in :meth:`~scrapy.core.scheduler.ScrapyPriorityQueue.push`
+    to make up for the removal of its ``priority`` parameter.
+
+*   The ``spider`` attribute has been removed. Use
+    :attr:`crawler.spider <scrapy.core.scheduler.ScrapyPriorityQueue.crawler>`
+    instead.
+
+The following changes affect specifically the
+:class:`~scrapy.core.scheduler.DownloaderAwarePriorityQueue` class and may
+affect subclasses:
+
+*   A new :attr:`~scrapy.core.scheduler.DownloaderAwarePriorityQueue.pqueues`
+    attribute offers a mapping of downloader slot names to the
+    corresponding instances of
+    :attr:`~scrapy.core.scheduler.DownloaderAwarePriorityQueue.downstream_queue_cls`.
+
+(:issue:`3884`)
+
+
+.. _release-1.8.0:
+
+Scrapy 1.8.0 (2019-10-28)
+-------------------------
+
+Highlights:
+
+* Dropped Python 3.4 support and updated minimum requirements; made Python 3.8
+  support official
+* New :meth:`Request.from_curl <scrapy.http.Request.from_curl>` class method
+* New :setting:`ROBOTSTXT_PARSER` and :setting:`ROBOTSTXT_USER_AGENT` settings
+* New :setting:`DOWNLOADER_CLIENT_TLS_CIPHERS` and
+  :setting:`DOWNLOADER_CLIENT_TLS_VERBOSE_LOGGING` settings
+
+Backward-incompatible changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+*   Python 3.4 is no longer supported, and some of the minimum requirements of
+    Scrapy have also changed:
+
+    *   :doc:`cssselect <cssselect:index>` 0.9.1
+    *   cryptography_ 2.0
+    *   lxml_ 3.5.0
+    *   pyOpenSSL_ 16.2.0
+    *   queuelib_ 1.4.2
+    *   service_identity_ 16.0.0
+    *   six_ 1.10.0
+    *   Twisted_ 17.9.0 (16.0.0 with Python 2)
+    *   zope.interface_ 4.1.3
+
+    (:issue:`3892`)
+
+*   ``JSONRequest`` is now called :class:`~scrapy.http.JsonRequest` for
+    consistency with similar classes (:issue:`3929`, :issue:`3982`)
+
+*   If you are using a custom context factory
+    (:setting:`DOWNLOADER_CLIENTCONTEXTFACTORY`), its ``__init__`` method must
+    accept two new parameters: ``tls_verbose_logging`` and ``tls_ciphers``
+    (:issue:`2111`, :issue:`3392`, :issue:`3442`, :issue:`3450`)
+
+*   :class:`~scrapy.loader.ItemLoader` now turns the values of its input item
+    into lists:
+
+    >>> item = MyItem()
+    >>> item['field'] = 'value1'
+    >>> loader = ItemLoader(item=item)
+    >>> item['field']
+    ['value1']
+
+    This is needed to allow adding values to existing fields
+    (``loader.add_value('field', 'value2')``).
+
+    (:issue:`3804`, :issue:`3819`, :issue:`3897`, :issue:`3976`, :issue:`3998`,
+    :issue:`4036`)
+
+See also :ref:`1.8-deprecation-removals` below.
+
+
+New features
+~~~~~~~~~~~~
+
+*   A new :meth:`Request.from_curl <scrapy.http.Request.from_curl>` class
+    method allows :ref:`creating a request from a cURL command
+    <requests-from-curl>` (:issue:`2985`, :issue:`3862`)
+
+*   A new :setting:`ROBOTSTXT_PARSER` setting allows choosing which robots.txt_
+    parser to use. It includes built-in support for
+    :ref:`RobotFileParser <python-robotfileparser>`,
+    :ref:`Protego <protego-parser>` (default), :ref:`Reppy <reppy-parser>`, and
+    :ref:`Robotexclusionrulesparser <rerp-parser>`, and allows you to
+    :ref:`implement support for additional parsers
+    <support-for-new-robots-parser>` (:issue:`754`, :issue:`2669`,
+    :issue:`3796`, :issue:`3935`, :issue:`3969`, :issue:`4006`)
+
+*   A new :setting:`ROBOTSTXT_USER_AGENT` setting allows defining a separate
+    user agent string to use for robots.txt_ parsing (:issue:`3931`,
+    :issue:`3966`)
+
+*   :class:`~scrapy.spiders.Rule` no longer requires a :class:`LinkExtractor
+    <scrapy.linkextractors.lxmlhtml.LxmlLinkExtractor>` parameter
+    (:issue:`781`, :issue:`4016`)
+
+*   Use the new :setting:`DOWNLOADER_CLIENT_TLS_CIPHERS` setting to customize
+    the TLS/SSL ciphers used by the default HTTP/1.1 downloader (:issue:`3392`,
+    :issue:`3442`)
+
+*   Set the new :setting:`DOWNLOADER_CLIENT_TLS_VERBOSE_LOGGING` setting to
+    ``True`` to enable debug-level messages about TLS connection parameters
+    after establishing HTTPS connections (:issue:`2111`, :issue:`3450`)
+
+*   Callbacks that receive keyword arguments
+    (see :attr:`Request.cb_kwargs <scrapy.http.Request.cb_kwargs>`) can now be
+    tested using the new :class:`@cb_kwargs
+    <scrapy.contracts.default.CallbackKeywordArgumentsContract>`
+    :ref:`spider contract <topics-contracts>` (:issue:`3985`, :issue:`3988`)
+
+*   When a :class:`@scrapes <scrapy.contracts.default.ScrapesContract>` spider
+    contract fails, all missing fields are now reported (:issue:`766`,
+    :issue:`3939`)
+
+*   :ref:`Custom log formats <custom-log-formats>` can now drop messages by
+    having the corresponding methods of the configured :setting:`LOG_FORMATTER`
+    return ``None`` (:issue:`3984`, :issue:`3987`)
+
+*   A much improved completion definition is now available for Zsh_
+    (:issue:`4069`)
+
+
+Bug fixes
+~~~~~~~~~
+
+*   :meth:`ItemLoader.load_item() <scrapy.loader.ItemLoader.load_item>` no
+    longer makes later calls to :meth:`ItemLoader.get_output_value()
+    <scrapy.loader.ItemLoader.get_output_value>` or
+    :meth:`ItemLoader.load_item() <scrapy.loader.ItemLoader.load_item>` return
+    empty data (:issue:`3804`, :issue:`3819`, :issue:`3897`, :issue:`3976`,
+    :issue:`3998`, :issue:`4036`)
+
+*   Fixed :class:`~scrapy.statscollectors.DummyStatsCollector` raising a
+    :exc:`TypeError` exception (:issue:`4007`, :issue:`4052`)
+
+*   :meth:`FilesPipeline.file_path
+    <scrapy.pipelines.files.FilesPipeline.file_path>` and
+    :meth:`ImagesPipeline.file_path
+    <scrapy.pipelines.images.ImagesPipeline.file_path>` no longer choose
+    file extensions that are not `registered with IANA`_ (:issue:`1287`,
+    :issue:`3953`, :issue:`3954`)
+
+*   When using botocore_ to persist files in S3, all botocore-supported headers
+    are properly mapped now (:issue:`3904`, :issue:`3905`)
+
+*   FTP passwords in :setting:`FEED_URI` containing percent-escaped characters
+    are now properly decoded (:issue:`3941`)
+
+*   A memory-handling and error-handling issue in
+    :func:`scrapy.utils.ssl.get_temp_key_info` has been fixed (:issue:`3920`)
+
+
+Documentation
+~~~~~~~~~~~~~
+
+*   The documentation now covers how to define and configure a :ref:`custom log
+    format <custom-log-formats>` (:issue:`3616`, :issue:`3660`)
+
+*   API documentation added for :class:`~scrapy.exporters.MarshalItemExporter`
+    and :class:`~scrapy.exporters.PythonItemExporter` (:issue:`3973`)
+
+*   API documentation added for :class:`~scrapy.item.BaseItem` and
+    :class:`~scrapy.item.ItemMeta` (:issue:`3999`)
+
+*   Minor documentation fixes (:issue:`2998`, :issue:`3398`, :issue:`3597`,
+    :issue:`3894`, :issue:`3934`, :issue:`3978`, :issue:`3993`, :issue:`4022`,
+    :issue:`4028`, :issue:`4033`, :issue:`4046`, :issue:`4050`, :issue:`4055`,
+    :issue:`4056`, :issue:`4061`, :issue:`4072`, :issue:`4071`, :issue:`4079`,
+    :issue:`4081`, :issue:`4089`, :issue:`4093`)
+
+
+.. _1.8-deprecation-removals:
+
+Deprecation removals
+~~~~~~~~~~~~~~~~~~~~
+
+*   ``scrapy.xlib`` has been removed (:issue:`4015`)
+
+
+Deprecations
+~~~~~~~~~~~~
+
+*   The LevelDB_ storage backend
+    (``scrapy.extensions.httpcache.LeveldbCacheStorage``) of
+    :class:`~scrapy.downloadermiddlewares.httpcache.HttpCacheMiddleware` is
+    deprecated (:issue:`4085`, :issue:`4092`)
+
+*   Use of the undocumented ``SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE`` environment
+    variable is deprecated (:issue:`3910`)
+
+*   ``scrapy.item.DictItem`` is deprecated, use :class:`~scrapy.item.Item`
+    instead (:issue:`3999`)
+
+
+Other changes
+~~~~~~~~~~~~~
+
+*   Minimum versions of optional Scrapy requirements that are covered by
+    continuous integration tests have been updated:
+
+    *   botocore_ 1.3.23
+    *   Pillow_ 3.4.2
+
+    Lower versions of these optional requirements may work, but it is not
+    guaranteed (:issue:`3892`)
+
+*   GitHub templates for bug reports and feature requests (:issue:`3126`,
+    :issue:`3471`, :issue:`3749`, :issue:`3754`)
+
+*   Continuous integration fixes (:issue:`3923`)
+
+*   Code cleanup (:issue:`3391`, :issue:`3907`, :issue:`3946`, :issue:`3950`,
+    :issue:`4023`, :issue:`4031`)
+
+
+.. _release-1.7.4:
+
+Scrapy 1.7.4 (2019-10-21)
+-------------------------
+
+Revert the fix for :issue:`3804` (:issue:`3819`), which has a few undesired
+side effects (:issue:`3897`, :issue:`3976`).
+
+As a result, when an item loader is initialized with an item,
+:meth:`ItemLoader.load_item() <scrapy.loader.ItemLoader.load_item>` once again
+makes later calls to :meth:`ItemLoader.get_output_value()
+<scrapy.loader.ItemLoader.get_output_value>` or :meth:`ItemLoader.load_item()
+<scrapy.loader.ItemLoader.load_item>` return empty data.
+
+
+.. _release-1.7.3:
+
+Scrapy 1.7.3 (2019-08-01)
+-------------------------
+
+Enforce lxml 4.3.5 or lower for Python 3.4 (:issue:`3912`, :issue:`3918`).
+
+
+.. _release-1.7.2:
+
+Scrapy 1.7.2 (2019-07-23)
+-------------------------
+
+Fix Python 2 support (:issue:`3889`, :issue:`3893`, :issue:`3896`).
+
+
+.. _release-1.7.1:
+
+Scrapy 1.7.1 (2019-07-18)
+-------------------------
+
+Re-packaging of Scrapy 1.7.0, which was missing some changes in PyPI.
+
+
+.. _release-1.7.0:
+
+Scrapy 1.7.0 (2019-07-18)
+-------------------------
+
+.. note:: Make sure you install Scrapy 1.7.1. The Scrapy 1.7.0 package in PyPI
+          is the result of an erroneous commit tagging and does not include all
+          the changes described below.
+
+Highlights:
+
+* Improvements for crawls targeting multiple domains
+* A cleaner way to pass arguments to callbacks
+* A new class for JSON requests
+* Improvements for rule-based spiders
+* New features for feed exports
+
+Backward-incompatible changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+*   ``429`` is now part of the :setting:`RETRY_HTTP_CODES` setting by default
+
+    This change is **backward incompatible**. If you don’t want to retry
+    ``429``, you must override :setting:`RETRY_HTTP_CODES` accordingly.
+
+*   :class:`~scrapy.crawler.Crawler`,
+    :class:`CrawlerRunner.crawl <scrapy.crawler.CrawlerRunner.crawl>` and
+    :class:`CrawlerRunner.create_crawler <scrapy.crawler.CrawlerRunner.create_crawler>`
+    no longer accept a :class:`~scrapy.spiders.Spider` subclass instance, they
+    only accept a :class:`~scrapy.spiders.Spider` subclass now.
+
+    :class:`~scrapy.spiders.Spider` subclass instances were never meant to
+    work, and they were not working as one would expect: instead of using the
+    passed :class:`~scrapy.spiders.Spider` subclass instance, their
+    :class:`~scrapy.spiders.Spider.from_crawler` method was called to generate
+    a new instance.
+
+*   Non-default values for the :setting:`SCHEDULER_PRIORITY_QUEUE` setting
+    may stop working. Scheduler priority queue classes now need to handle
+    :class:`~scrapy.http.Request` objects instead of arbitrary Python data
+    structures.
+
+*   An additional ``crawler`` parameter has been added to the ``__init__``
+    method of the :class:`~scrapy.core.scheduler.Scheduler` class. Custom
+    scheduler subclasses which don't accept arbitrary parameters in their
+    ``__init__`` method might break because of this change.
+
+    For more information, see :setting:`SCHEDULER`.
+
+See also :ref:`1.7-deprecation-removals` below.
+
+
+New features
+~~~~~~~~~~~~
+
+*   A new scheduler priority queue,
+    ``scrapy.pqueues.DownloaderAwarePriorityQueue``, may be
+    :ref:`enabled <broad-crawls-scheduler-priority-queue>` for a significant
+    scheduling improvement on crawls targetting multiple web domains, at the
+    cost of no :setting:`CONCURRENT_REQUESTS_PER_IP` support (:issue:`3520`)
+
+*   A new :attr:`Request.cb_kwargs <scrapy.http.Request.cb_kwargs>` attribute
+    provides a cleaner way to pass keyword arguments to callback methods
+    (:issue:`1138`, :issue:`3563`)
+
+*   A new :class:`JSONRequest <scrapy.http.JsonRequest>` class offers a more
+    convenient way to build JSON requests (:issue:`3504`, :issue:`3505`)
+
+*   A ``process_request`` callback passed to the :class:`~scrapy.spiders.Rule`
+    ``__init__`` method now receives the :class:`~scrapy.http.Response` object that
+    originated the request as its second argument (:issue:`3682`)
+
+*   A new ``restrict_text`` parameter for the
+    :attr:`LinkExtractor <scrapy.linkextractors.lxmlhtml.LxmlLinkExtractor>`
+    ``__init__`` method allows filtering links by linking text (:issue:`3622`,
+    :issue:`3635`)
+
+*   A new :setting:`FEED_STORAGE_S3_ACL` setting allows defining a custom ACL
+    for feeds exported to Amazon S3 (:issue:`3607`)
+
+*   A new :setting:`FEED_STORAGE_FTP_ACTIVE` setting allows using FTP’s active
+    connection mode for feeds exported to FTP servers (:issue:`3829`)
+
+*   A new :setting:`METAREFRESH_IGNORE_TAGS` setting allows overriding which
+    HTML tags are ignored when searching a response for HTML meta tags that
+    trigger a redirect (:issue:`1422`, :issue:`3768`)
+
+*   A new :reqmeta:`redirect_reasons` request meta key exposes the reason
+    (status code, meta refresh) behind every followed redirect (:issue:`3581`,
+    :issue:`3687`)
+
+*   The ``SCRAPY_CHECK`` variable is now set to the ``true`` string during runs
+    of the :command:`check` command, which allows :ref:`detecting contract
+    check runs from code <detecting-contract-check-runs>` (:issue:`3704`,
+    :issue:`3739`)
+
+*   A new :meth:`Item.deepcopy() <scrapy.item.Item.deepcopy>` method makes it
+    easier to :ref:`deep-copy items <copying-items>` (:issue:`1493`,
+    :issue:`3671`)
+
+*   :class:`~scrapy.extensions.corestats.CoreStats` also logs
+    ``elapsed_time_seconds`` now (:issue:`3638`)
+
+*   Exceptions from :class:`~scrapy.loader.ItemLoader` :ref:`input and output
+    processors <topics-loaders-processors>` are now more verbose
+    (:issue:`3836`, :issue:`3840`)
+
+*   :class:`~scrapy.crawler.Crawler`,
+    :class:`CrawlerRunner.crawl <scrapy.crawler.CrawlerRunner.crawl>` and
+    :class:`CrawlerRunner.create_crawler <scrapy.crawler.CrawlerRunner.create_crawler>`
+    now fail gracefully if they receive a :class:`~scrapy.spiders.Spider`
+    subclass instance instead of the subclass itself (:issue:`2283`,
+    :issue:`3610`, :issue:`3872`)
+
+
+Bug fixes
+~~~~~~~~~
+
+*   :meth:`~scrapy.spidermiddlewares.SpiderMiddleware.process_spider_exception`
+    is now also invoked for generators (:issue:`220`, :issue:`2061`)
+
+*   System exceptions like KeyboardInterrupt_ are no longer caught
+    (:issue:`3726`)
+
+*   :meth:`ItemLoader.load_item() <scrapy.loader.ItemLoader.load_item>` no
+    longer makes later calls to :meth:`ItemLoader.get_output_value()
+    <scrapy.loader.ItemLoader.get_output_value>` or
+    :meth:`ItemLoader.load_item() <scrapy.loader.ItemLoader.load_item>` return
+    empty data (:issue:`3804`, :issue:`3819`)
+
+*   The images pipeline (:class:`~scrapy.pipelines.images.ImagesPipeline`) no
+    longer ignores these Amazon S3 settings: :setting:`AWS_ENDPOINT_URL`,
+    :setting:`AWS_REGION_NAME`, :setting:`AWS_USE_SSL`, :setting:`AWS_VERIFY`
+    (:issue:`3625`)
+
+*   Fixed a memory leak in ``scrapy.pipelines.media.MediaPipeline`` affecting,
+    for example, non-200 responses and exceptions from custom middlewares
+    (:issue:`3813`)
+
+*   Requests with private callbacks are now correctly unserialized from disk
+    (:issue:`3790`)
+
+*   :meth:`FormRequest.from_response() <scrapy.http.FormRequest.from_response>`
+    now handles invalid methods like major web browsers (:issue:`3777`,
+    :issue:`3794`)
+
+
+Documentation
+~~~~~~~~~~~~~
+
+*   A new topic, :ref:`topics-dynamic-content`, covers recommended approaches
+    to read dynamically-loaded data (:issue:`3703`)
+
+*   :ref:`topics-broad-crawls` now features information about memory usage
+    (:issue:`1264`, :issue:`3866`)
+
+*   The documentation of :class:`~scrapy.spiders.Rule` now covers how to access
+    the text of a link when using :class:`~scrapy.spiders.CrawlSpider`
+    (:issue:`3711`, :issue:`3712`)
+
+*   A new section, :ref:`httpcache-storage-custom`, covers writing a custom
+    cache storage backend for
+    :class:`~scrapy.downloadermiddlewares.httpcache.HttpCacheMiddleware`
+    (:issue:`3683`, :issue:`3692`)
+
+*   A new :ref:`FAQ <faq>` entry, :ref:`faq-split-item`, explains what to do
+    when you want to split an item into multiple items from an item pipeline
+    (:issue:`2240`, :issue:`3672`)
+
+*   Updated the :ref:`FAQ entry about crawl order <faq-bfo-dfo>` to explain why
+    the first few requests rarely follow the desired order (:issue:`1739`,
+    :issue:`3621`)
+
+*   The :setting:`LOGSTATS_INTERVAL` setting (:issue:`3730`), the
+    :meth:`FilesPipeline.file_path <scrapy.pipelines.files.FilesPipeline.file_path>`
+    and
+    :meth:`ImagesPipeline.file_path <scrapy.pipelines.images.ImagesPipeline.file_path>`
+    methods (:issue:`2253`, :issue:`3609`) and the
+    :meth:`Crawler.stop() <scrapy.crawler.Crawler.stop>` method (:issue:`3842`)
+    are now documented
+
+*   Some parts of the documentation that were confusing or misleading are now
+    clearer (:issue:`1347`, :issue:`1789`, :issue:`2289`, :issue:`3069`,
+    :issue:`3615`, :issue:`3626`, :issue:`3668`, :issue:`3670`, :issue:`3673`,
+    :issue:`3728`, :issue:`3762`, :issue:`3861`, :issue:`3882`)
+
+*   Minor documentation fixes (:issue:`3648`, :issue:`3649`, :issue:`3662`,
+    :issue:`3674`, :issue:`3676`, :issue:`3694`, :issue:`3724`, :issue:`3764`,
+    :issue:`3767`, :issue:`3791`, :issue:`3797`, :issue:`3806`, :issue:`3812`)
+
+.. _1.7-deprecation-removals:
+
+Deprecation removals
+~~~~~~~~~~~~~~~~~~~~
+
+The following deprecated APIs have been removed (:issue:`3578`):
+
+*   ``scrapy.conf`` (use :attr:`Crawler.settings
+    <scrapy.crawler.Crawler.settings>`)
+
+*   From ``scrapy.core.downloader.handlers``:
+
+    *   ``http.HttpDownloadHandler`` (use ``http10.HTTP10DownloadHandler``)
+
+*   ``scrapy.loader.ItemLoader._get_values`` (use ``_get_xpathvalues``)
+
+*   ``scrapy.loader.XPathItemLoader`` (use :class:`~scrapy.loader.ItemLoader`)
+
+*   ``scrapy.log`` (see :ref:`topics-logging`)
+
+*   From ``scrapy.pipelines``:
+
+    *   ``files.FilesPipeline.file_key`` (use ``file_path``)
+
+    *   ``images.ImagesPipeline.file_key`` (use ``file_path``)
+
+    *   ``images.ImagesPipeline.image_key`` (use ``file_path``)
+
+    *   ``images.ImagesPipeline.thumb_key`` (use ``thumb_path``)
+
+*   From both ``scrapy.selector`` and ``scrapy.selector.lxmlsel``:
+
+    *   ``HtmlXPathSelector`` (use :class:`~scrapy.selector.Selector`)
+
+    *   ``XmlXPathSelector`` (use :class:`~scrapy.selector.Selector`)
+
+    *   ``XPathSelector`` (use :class:`~scrapy.selector.Selector`)
+
+    *   ``XPathSelectorList`` (use :class:`~scrapy.selector.Selector`)
+
+*   From ``scrapy.selector.csstranslator``:
+
+    *   ``ScrapyGenericTranslator`` (use parsel.csstranslator.GenericTranslator_)
+
+    *   ``ScrapyHTMLTranslator`` (use parsel.csstranslator.HTMLTranslator_)
+
+    *   ``ScrapyXPathExpr`` (use parsel.csstranslator.XPathExpr_)
+
+*   From :class:`~scrapy.selector.Selector`:
+
+    *   ``_root`` (both the ``__init__`` method argument and the object property, use
+        ``root``)
+
+    *   ``extract_unquoted`` (use ``getall``)
+
+    *   ``select`` (use ``xpath``)
+
+*   From :class:`~scrapy.selector.SelectorList`:
+
+    *   ``extract_unquoted`` (use ``getall``)
+
+    *   ``select`` (use ``xpath``)
+
+    *   ``x`` (use ``xpath``)
+
+*   ``scrapy.spiders.BaseSpider`` (use :class:`~scrapy.spiders.Spider`)
+
+*   From :class:`~scrapy.spiders.Spider` (and subclasses):
+
+    *   ``DOWNLOAD_DELAY`` (use :ref:`download_delay
+        <spider-download_delay-attribute>`)
+
+    *   ``set_crawler`` (use :meth:`~scrapy.spiders.Spider.from_crawler`)
+
+*   ``scrapy.spiders.spiders`` (use :class:`~scrapy.spiderloader.SpiderLoader`)
+
+*   ``scrapy.telnet`` (use :mod:`scrapy.extensions.telnet`)
+
+*   From ``scrapy.utils.python``:
+
+    *   ``str_to_unicode`` (use ``to_unicode``)
+
+    *   ``unicode_to_str`` (use ``to_bytes``)
+
+*   ``scrapy.utils.response.body_or_str``
+
+The following deprecated settings have also been removed (:issue:`3578`):
+
+*   ``SPIDER_MANAGER_CLASS`` (use :setting:`SPIDER_LOADER_CLASS`)
+
+
+Deprecations
+~~~~~~~~~~~~
+
+*   The ``queuelib.PriorityQueue`` value for the
+    :setting:`SCHEDULER_PRIORITY_QUEUE` setting is deprecated. Use
+    ``scrapy.pqueues.ScrapyPriorityQueue`` instead.
+
+*   ``process_request`` callbacks passed to :class:`~scrapy.spiders.Rule` that
+    do not accept two arguments are deprecated.
+
+*   The following modules are deprecated:
+
+    *   ``scrapy.utils.http`` (use `w3lib.http`_)
+
+    *   ``scrapy.utils.markup`` (use `w3lib.html`_)
+
+    *   ``scrapy.utils.multipart`` (use `urllib3`_)
+
+*   The ``scrapy.utils.datatypes.MergeDict`` class is deprecated for Python 3
+    code bases. Use :class:`~collections.ChainMap` instead. (:issue:`3878`)
+
+*   The ``scrapy.utils.gz.is_gzipped`` function is deprecated. Use
+    ``scrapy.utils.gz.gzip_magic_number`` instead.
+
+.. _urllib3: https://urllib3.readthedocs.io/en/latest/index.html
+.. _w3lib.html: https://w3lib.readthedocs.io/en/latest/w3lib.html#module-w3lib.html
+.. _w3lib.http: https://w3lib.readthedocs.io/en/latest/w3lib.html#module-w3lib.http
+
+
+Other changes
+~~~~~~~~~~~~~
+
+*   It is now possible to run all tests from the same tox_ environment in
+    parallel; the documentation now covers :ref:`this and other ways to run
+    tests <running-tests>` (:issue:`3707`)
+
+*   It is now possible to generate an API documentation coverage report
+    (:issue:`3806`, :issue:`3810`, :issue:`3860`)
+
+*   The :ref:`documentation policies <documentation-policies>` now require
+    docstrings_ (:issue:`3701`) that follow `PEP 257`_ (:issue:`3748`)
+
+*   Internal fixes and cleanup (:issue:`3629`, :issue:`3643`, :issue:`3684`,
+    :issue:`3698`, :issue:`3734`, :issue:`3735`, :issue:`3736`, :issue:`3737`,
+    :issue:`3809`, :issue:`3821`, :issue:`3825`, :issue:`3827`, :issue:`3833`,
+    :issue:`3857`, :issue:`3877`)
+
+.. _release-1.6.0:
+
+Scrapy 1.6.0 (2019-01-30)
+-------------------------
+
+Highlights:
+
+* better Windows support;
+* Python 3.7 compatibility;
+* big documentation improvements, including a switch
+  from ``.extract_first()`` + ``.extract()`` API to ``.get()`` + ``.getall()``
+  API;
+* feed exports, FilePipeline and MediaPipeline improvements;
+* better extensibility: :signal:`item_error` and
+  :signal:`request_reached_downloader` signals; ``from_crawler`` support
+  for feed exporters, feed storages and dupefilters.
+* ``scrapy.contracts`` fixes and new features;
+* telnet console security improvements, first released as a
+  backport in :ref:`release-1.5.2`;
+* clean-up of the deprecated code;
+* various bug fixes, small new features and usability improvements across
+  the codebase.
+
+Selector API changes
+~~~~~~~~~~~~~~~~~~~~
+
+While these are not changes in Scrapy itself, but rather in the parsel_
+library which Scrapy uses for xpath/css selectors, these changes are
+worth mentioning here. Scrapy now depends on parsel >= 1.5, and
+Scrapy documentation is updated to follow recent ``parsel`` API conventions.
+
+Most visible change is that ``.get()`` and ``.getall()`` selector
+methods are now preferred over ``.extract_first()`` and ``.extract()``.
+We feel that these new methods result in a more concise and readable code.
+See :ref:`old-extraction-api` for more details.
+
+.. note::
+    There are currently **no plans** to deprecate ``.extract()``
+    and ``.extract_first()`` methods.
+
+Another useful new feature is the introduction of ``Selector.attrib`` and
+``SelectorList.attrib`` properties, which make it easier to get
+attributes of HTML elements. See :ref:`selecting-attributes`.
+
+CSS selectors are cached in parsel >= 1.5, which makes them faster
+when the same CSS path is used many times. This is very common in
+case of Scrapy spiders: callbacks are usually called several times,
+on different pages.
+
+If you're using custom ``Selector`` or ``SelectorList`` subclasses,
+a **backward incompatible** change in parsel may affect your code.
+See `parsel changelog`_ for a detailed description, as well as for the
+full list of improvements.
+
+.. _parsel changelog: https://parsel.readthedocs.io/en/latest/history.html
+
+Telnet console
+~~~~~~~~~~~~~~
+
+**Backward incompatible**: Scrapy's telnet console now requires username
+and password. See :ref:`topics-telnetconsole` for more details. This change
+fixes a **security issue**; see :ref:`release-1.5.2` release notes for details.
+
+New extensibility features
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* ``from_crawler`` support is added to feed exporters and feed storages. This,
+  among other things, allows to access Scrapy settings from custom feed
+  storages and exporters (:issue:`1605`, :issue:`3348`).
+* ``from_crawler`` support is added to dupefilters (:issue:`2956`); this allows
+  to access e.g. settings or a spider from a dupefilter.
+* :signal:`item_error` is fired when an error happens in a pipeline
+  (:issue:`3256`);
+* :signal:`request_reached_downloader` is fired when Downloader gets
+  a new Request; this signal can be useful e.g. for custom Schedulers
+  (:issue:`3393`).
+* new SitemapSpider :meth:`~.SitemapSpider.sitemap_filter` method which allows
+  to select sitemap entries based on their attributes in SitemapSpider
+  subclasses (:issue:`3512`).
+* Lazy loading of Downloader Handlers is now optional; this enables better
+  initialization error handling in custom Downloader Handlers (:issue:`3394`).
+
+New FilePipeline and MediaPipeline features
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* Expose more options for S3FilesStore: :setting:`AWS_ENDPOINT_URL`,
+  :setting:`AWS_USE_SSL`, :setting:`AWS_VERIFY`, :setting:`AWS_REGION_NAME`.
+  For example, this allows to use alternative or self-hosted
+  AWS-compatible providers (:issue:`2609`, :issue:`3548`).
+* ACL support for Google Cloud Storage: :setting:`FILES_STORE_GCS_ACL` and
+  :setting:`IMAGES_STORE_GCS_ACL` (:issue:`3199`).
+
+``scrapy.contracts`` improvements
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* Exceptions in contracts code are handled better (:issue:`3377`);
+* ``dont_filter=True`` is used for contract requests, which allows to test
+  different callbacks with the same URL (:issue:`3381`);
+* ``request_cls`` attribute in Contract subclasses allow to use different
+  Request classes in contracts, for example FormRequest (:issue:`3383`).
+* Fixed errback handling in contracts, e.g. for cases where a contract
+  is executed for URL which returns non-200 response (:issue:`3371`).
+
+Usability improvements
+~~~~~~~~~~~~~~~~~~~~~~
+
+* more stats for RobotsTxtMiddleware (:issue:`3100`)
+* INFO log level is used to show telnet host/port (:issue:`3115`)
+* a message is added to IgnoreRequest in RobotsTxtMiddleware (:issue:`3113`)
+* better validation of ``url`` argument in ``Response.follow`` (:issue:`3131`)
+* non-zero exit code is returned from Scrapy commands when error happens
+  on spider initialization (:issue:`3226`)
+* Link extraction improvements: "ftp" is added to scheme list (:issue:`3152`);
+  "flv" is added to common video extensions (:issue:`3165`)
+* better error message when an exporter is disabled (:issue:`3358`);
+* ``scrapy shell --help`` mentions syntax required for local files
+  (``./file.html``) - :issue:`3496`.
+* Referer header value is added to RFPDupeFilter log messages (:issue:`3588`)
+
+Bug fixes
+~~~~~~~~~
+
+* fixed issue with extra blank lines in .csv exports under Windows
+  (:issue:`3039`);
+* proper handling of pickling errors in Python 3 when serializing objects
+  for disk queues (:issue:`3082`)
+* flags are now preserved when copying Requests (:issue:`3342`);
+* FormRequest.from_response clickdata shouldn't ignore elements with
+  ``input[type=image]`` (:issue:`3153`).
+* FormRequest.from_response should preserve duplicate keys (:issue:`3247`)
+
+Documentation improvements
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* Docs are re-written to suggest .get/.getall API instead of
+  .extract/.extract_first. Also, :ref:`topics-selectors` docs are updated
+  and re-structured to match latest parsel docs; they now contain more topics,
+  such as :ref:`selecting-attributes` or :ref:`topics-selectors-css-extensions`
+  (:issue:`3390`).
+* :ref:`topics-developer-tools` is a new tutorial which replaces
+  old Firefox and Firebug tutorials (:issue:`3400`).
+* SCRAPY_PROJECT environment variable is documented (:issue:`3518`);
+* troubleshooting section is added to install instructions (:issue:`3517`);
+* improved links to beginner resources in the tutorial
+  (:issue:`3367`, :issue:`3468`);
+* fixed :setting:`RETRY_HTTP_CODES` default values in docs (:issue:`3335`);
+* remove unused ``DEPTH_STATS`` option from docs (:issue:`3245`);
+* other cleanups (:issue:`3347`, :issue:`3350`, :issue:`3445`, :issue:`3544`,
+  :issue:`3605`).
+
+Deprecation removals
+~~~~~~~~~~~~~~~~~~~~
+
+Compatibility shims for pre-1.0 Scrapy module names are removed
+(:issue:`3318`):
+
+* ``scrapy.command``
+* ``scrapy.contrib`` (with all submodules)
+* ``scrapy.contrib_exp`` (with all submodules)
+* ``scrapy.dupefilter``
+* ``scrapy.linkextractor``
+* ``scrapy.project``
+* ``scrapy.spider``
+* ``scrapy.spidermanager``
+* ``scrapy.squeue``
+* ``scrapy.stats``
+* ``scrapy.statscol``
+* ``scrapy.utils.decorator``
+
+See :ref:`module-relocations` for more information, or use suggestions
+from Scrapy 1.5.x deprecation warnings to update your code.
+
+Other deprecation removals:
+
+* Deprecated scrapy.interfaces.ISpiderManager is removed; please use
+  scrapy.interfaces.ISpiderLoader.
+* Deprecated ``CrawlerSettings`` class is removed (:issue:`3327`).
+* Deprecated ``Settings.overrides`` and ``Settings.defaults`` attributes
+  are removed (:issue:`3327`, :issue:`3359`).
+
+Other improvements, cleanups
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* All Scrapy tests now pass on Windows; Scrapy testing suite is executed
+  in a Windows environment on CI (:issue:`3315`).
+* Python 3.7 support (:issue:`3326`, :issue:`3150`, :issue:`3547`).
+* Testing and CI fixes (:issue:`3526`, :issue:`3538`, :issue:`3308`,
+  :issue:`3311`, :issue:`3309`, :issue:`3305`, :issue:`3210`, :issue:`3299`)
+* ``scrapy.http.cookies.CookieJar.clear`` accepts "domain", "path" and "name"
+  optional arguments (:issue:`3231`).
+* additional files are included to sdist (:issue:`3495`);
+* code style fixes (:issue:`3405`, :issue:`3304`);
+* unneeded .strip() call is removed (:issue:`3519`);
+* collections.deque is used to store MiddlewareManager methods instead
+  of a list (:issue:`3476`)
+
+.. _release-1.5.2:
+
+Scrapy 1.5.2 (2019-01-22)
+-------------------------
+
+* *Security bugfix*: Telnet console extension can be easily exploited by rogue
+  websites POSTing content to http://localhost:6023, we haven't found a way to
+  exploit it from Scrapy, but it is very easy to trick a browser to do so and
+  elevates the risk for local development environment.
+
+  *The fix is backward incompatible*, it enables telnet user-password
+  authentication by default with a random generated password. If you can't
+  upgrade right away, please consider setting :setting:`TELNETCONSOLE_PORT`
+  out of its default value.
+
+  See :ref:`telnet console <topics-telnetconsole>` documentation for more info
+
+* Backport CI build failure under GCE environment due to boto import error.
+
+.. _release-1.5.1:
+
+Scrapy 1.5.1 (2018-07-12)
+-------------------------
+
+This is a maintenance release with important bug fixes, but no new features:
+
+* ``O(N^2)`` gzip decompression issue which affected Python 3 and PyPy
+  is fixed (:issue:`3281`);
+* skipping of TLS validation errors is improved (:issue:`3166`);
+* Ctrl-C handling is fixed in Python 3.5+ (:issue:`3096`);
+* testing fixes (:issue:`3092`, :issue:`3263`);
+* documentation improvements (:issue:`3058`, :issue:`3059`, :issue:`3089`,
+  :issue:`3123`, :issue:`3127`, :issue:`3189`, :issue:`3224`, :issue:`3280`,
+  :issue:`3279`, :issue:`3201`, :issue:`3260`, :issue:`3284`, :issue:`3298`,
+  :issue:`3294`).
+
+
+.. _release-1.5.0:
+
+Scrapy 1.5.0 (2017-12-29)
+-------------------------
+
+This release brings small new features and improvements across the codebase.
+Some highlights:
+
+* Google Cloud Storage is supported in FilesPipeline and ImagesPipeline.
+* Crawling with proxy servers becomes more efficient, as connections
+  to proxies can be reused now.
+* Warnings, exception and logging messages are improved to make debugging
+  easier.
+* ``scrapy parse`` command now allows to set custom request meta via
+  ``--meta`` argument.
+* Compatibility with Python 3.6, PyPy and PyPy3 is improved;
+  PyPy and PyPy3 are now supported officially, by running tests on CI.
+* Better default handling of HTTP 308, 522 and 524 status codes.
+* Documentation is improved, as usual.
+
+Backward Incompatible Changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* Scrapy 1.5 drops support for Python 3.3.
+* Default Scrapy User-Agent now uses https link to scrapy.org (:issue:`2983`).
+  **This is technically backward-incompatible**; override
+  :setting:`USER_AGENT` if you relied on old value.
+* Logging of settings overridden by ``custom_settings`` is fixed;
+  **this is technically backward-incompatible** because the logger
+  changes from ``[scrapy.utils.log]`` to ``[scrapy.crawler]``. If you're
+  parsing Scrapy logs, please update your log parsers (:issue:`1343`).
+* LinkExtractor now ignores ``m4v`` extension by default, this is change
+  in behavior.
+* 522 and 524 status codes are added to ``RETRY_HTTP_CODES`` (:issue:`2851`)
+
+New features
+~~~~~~~~~~~~
+
+- Support ``<link>`` tags in ``Response.follow`` (:issue:`2785`)
+- Support for ``ptpython`` REPL (:issue:`2654`)
+- Google Cloud Storage support for FilesPipeline and ImagesPipeline
+  (:issue:`2923`).
+- New ``--meta`` option of the "scrapy parse" command allows to pass additional
+  request.meta (:issue:`2883`)
+- Populate spider variable when using ``shell.inspect_response`` (:issue:`2812`)
+- Handle HTTP 308 Permanent Redirect (:issue:`2844`)
+- Add 522 and 524 to ``RETRY_HTTP_CODES`` (:issue:`2851`)
+- Log versions information at startup (:issue:`2857`)
+- ``scrapy.mail.MailSender`` now works in Python 3 (it requires Twisted 17.9.0)
+- Connections to proxy servers are reused (:issue:`2743`)
+- Add template for a downloader middleware (:issue:`2755`)
+- Explicit message for NotImplementedError when parse callback not defined
+  (:issue:`2831`)
+- CrawlerProcess got an option to disable installation of root log handler
+  (:issue:`2921`)
+- LinkExtractor now ignores ``m4v`` extension by default
+- Better log messages for responses over :setting:`DOWNLOAD_WARNSIZE` and
+  :setting:`DOWNLOAD_MAXSIZE` limits (:issue:`2927`)
+- Show warning when a URL is put to ``Spider.allowed_domains`` instead of
+  a domain (:issue:`2250`).
+
+Bug fixes
+~~~~~~~~~
+
+- Fix logging of settings overridden by ``custom_settings``;
+  **this is technically backward-incompatible** because the logger
+  changes from ``[scrapy.utils.log]`` to ``[scrapy.crawler]``, so please
+  update your log parsers if needed (:issue:`1343`)
+- Default Scrapy User-Agent now uses https link to scrapy.org (:issue:`2983`).
+  **This is technically backward-incompatible**; override
+  :setting:`USER_AGENT` if you relied on old value.
+- Fix PyPy and PyPy3 test failures, support them officially
+  (:issue:`2793`, :issue:`2935`, :issue:`2990`, :issue:`3050`, :issue:`2213`,
+  :issue:`3048`)
+- Fix DNS resolver when ``DNSCACHE_ENABLED=False`` (:issue:`2811`)
+- Add ``cryptography`` for Debian Jessie tox test env (:issue:`2848`)
+- Add verification to check if Request callback is callable (:issue:`2766`)
+- Port ``extras/qpsclient.py`` to Python 3 (:issue:`2849`)
+- Use getfullargspec under the scenes for Python 3 to stop DeprecationWarning
+  (:issue:`2862`)
+- Update deprecated test aliases (:issue:`2876`)
+- Fix ``SitemapSpider`` support for alternate links (:issue:`2853`)
+
+Docs
+~~~~
+
+- Added missing bullet point for the ``AUTOTHROTTLE_TARGET_CONCURRENCY``
+  setting. (:issue:`2756`)
+- Update Contributing docs, document new support channels
+  (:issue:`2762`, issue:`3038`)
+- Include references to Scrapy subreddit in the docs
+- Fix broken links; use https:// for external links
+  (:issue:`2978`, :issue:`2982`, :issue:`2958`)
+- Document CloseSpider extension better (:issue:`2759`)
+- Use ``pymongo.collection.Collection.insert_one()`` in MongoDB example
+  (:issue:`2781`)
+- Spelling mistake and typos
+  (:issue:`2828`, :issue:`2837`, :issue:`2884`, :issue:`2924`)
+- Clarify ``CSVFeedSpider.headers`` documentation (:issue:`2826`)
+- Document ``DontCloseSpider`` exception and clarify ``spider_idle``
+  (:issue:`2791`)
+- Update "Releases" section in README (:issue:`2764`)
+- Fix rst syntax in ``DOWNLOAD_FAIL_ON_DATALOSS`` docs (:issue:`2763`)
+- Small fix in description of startproject arguments (:issue:`2866`)
+- Clarify data types in Response.body docs (:issue:`2922`)
+- Add a note about ``request.meta['depth']`` to DepthMiddleware docs (:issue:`2374`)
+- Add a note about ``request.meta['dont_merge_cookies']`` to CookiesMiddleware
+  docs (:issue:`2999`)
+- Up-to-date example of project structure (:issue:`2964`, :issue:`2976`)
+- A better example of ItemExporters usage (:issue:`2989`)
+- Document ``from_crawler`` methods for spider and downloader middlewares
+  (:issue:`3019`)
+
+.. _release-1.4.0:
+
+Scrapy 1.4.0 (2017-05-18)
+-------------------------
+
+Scrapy 1.4 does not bring that many breathtaking new features
+but quite a few handy improvements nonetheless.
+
+Scrapy now supports anonymous FTP sessions with customizable user and
+password via the new :setting:`FTP_USER` and :setting:`FTP_PASSWORD` settings.
+And if you're using Twisted version 17.1.0 or above, FTP is now available
+with Python 3.
+
+There's a new :meth:`response.follow <scrapy.http.TextResponse.follow>` method
+for creating requests; **it is now a recommended way to create Requests
+in Scrapy spiders**. This method makes it easier to write correct
+spiders; ``response.follow`` has several advantages over creating
+``scrapy.Request`` objects directly:
+
+* it handles relative URLs;
+* it works properly with non-ascii URLs on non-UTF8 pages;
+* in addition to absolute and relative URLs it supports Selectors;
+  for ``<a>`` elements it can also extract their href values.
+
+For example, instead of this::
+
+    for href in response.css('li.page a::attr(href)').extract():
+        url = response.urljoin(href)
+        yield scrapy.Request(url, self.parse, encoding=response.encoding)
+
+One can now write this::
+
+    for a in response.css('li.page a'):
+        yield response.follow(a, self.parse)
+
+Link extractors are also improved. They work similarly to what a regular
+modern browser would do: leading and trailing whitespace are removed
+from attributes (think ``href="   http://example.com"``) when building
+``Link`` objects. This whitespace-stripping also happens for ``action``
+attributes with ``FormRequest``.
+
+**Please also note that link extractors do not canonicalize URLs by default
+anymore.** This was puzzling users every now and then, and it's not what
+browsers do in fact, so we removed that extra transformation on extracted
+links.
+
+For those of you wanting more control on the ``Referer:`` header that Scrapy
+sends when following links, you can set your own ``Referrer Policy``.
+Prior to Scrapy 1.4, the default ``RefererMiddleware`` would simply and
+blindly set it to the URL of the response that generated the HTTP request
+(which could leak information on your URL seeds).
+By default, Scrapy now behaves much like your regular browser does.
+And this policy is fully customizable with W3C standard values
+(or with something really custom of your own if you wish).
+See :setting:`REFERRER_POLICY` for details.
+
+To make Scrapy spiders easier to debug, Scrapy logs more stats by default
+in 1.4: memory usage stats, detailed retry stats, detailed HTTP error code
+stats. A similar change is that HTTP cache path is also visible in logs now.
+
+Last but not least, Scrapy now has the option to make JSON and XML items
+more human-readable, with newlines between items and even custom indenting
+offset, using the new :setting:`FEED_EXPORT_INDENT` setting.
+
+Enjoy! (Or read on for the rest of changes in this release.)
+
+Deprecations and Backward Incompatible Changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- Default to ``canonicalize=False`` in
+  :class:`scrapy.linkextractors.LinkExtractor
+  <scrapy.linkextractors.lxmlhtml.LxmlLinkExtractor>`
+  (:issue:`2537`, fixes :issue:`1941` and :issue:`1982`):
+  **warning, this is technically backward-incompatible**
+- Enable memusage extension by default (:issue:`2539`, fixes :issue:`2187`);
+  **this is technically backward-incompatible** so please check if you have
+  any non-default ``MEMUSAGE_***`` options set.
+- ``EDITOR`` environment variable now takes precedence over ``EDITOR``
+  option defined in settings.py (:issue:`1829`); Scrapy default settings
+  no longer depend on environment variables. **This is technically a backward
+  incompatible change**.
+- ``Spider.make_requests_from_url`` is deprecated
+  (:issue:`1728`, fixes :issue:`1495`).
+
+New Features
+~~~~~~~~~~~~
+
+- Accept proxy credentials in :reqmeta:`proxy` request meta key (:issue:`2526`)
+- Support `brotli`_-compressed content; requires optional `brotlipy`_
+  (:issue:`2535`)
+- New :ref:`response.follow <response-follow-example>` shortcut
+  for creating requests (:issue:`1940`)
+- Added ``flags`` argument and attribute to :class:`Request <scrapy.http.Request>`
+  objects (:issue:`2047`)
+- Support Anonymous FTP (:issue:`2342`)
+- Added ``retry/count``, ``retry/max_reached`` and ``retry/reason_count/<reason>``
+  stats to :class:`RetryMiddleware <scrapy.downloadermiddlewares.retry.RetryMiddleware>`
+  (:issue:`2543`)
+- Added ``httperror/response_ignored_count`` and ``httperror/response_ignored_status_count/<status>``
+  stats to :class:`HttpErrorMiddleware <scrapy.spidermiddlewares.httperror.HttpErrorMiddleware>`
+  (:issue:`2566`)
+- Customizable :setting:`Referrer policy <REFERRER_POLICY>` in
+  :class:`RefererMiddleware <scrapy.spidermiddlewares.referer.RefererMiddleware>`
+  (:issue:`2306`)
+- New ``data:`` URI download handler (:issue:`2334`, fixes :issue:`2156`)
+- Log cache directory when HTTP Cache is used (:issue:`2611`, fixes :issue:`2604`)
+- Warn users when project contains duplicate spider names (fixes :issue:`2181`)
+- ``scrapy.utils.datatypes.CaselessDict`` now accepts ``Mapping`` instances and
+  not only dicts (:issue:`2646`)
+- :ref:`Media downloads <topics-media-pipeline>`, with
+  :class:`~scrapy.pipelines.files.FilesPipeline` or
+  :class:`~scrapy.pipelines.images.ImagesPipeline`, can now optionally handle
+  HTTP redirects using the new :setting:`MEDIA_ALLOW_REDIRECTS` setting
+  (:issue:`2616`, fixes :issue:`2004`)
+- Accept non-complete responses from websites using a new
+  :setting:`DOWNLOAD_FAIL_ON_DATALOSS` setting (:issue:`2590`, fixes :issue:`2586`)
+- Optional pretty-printing of JSON and XML items via
+  :setting:`FEED_EXPORT_INDENT` setting (:issue:`2456`, fixes :issue:`1327`)
+- Allow dropping fields in ``FormRequest.from_response`` formdata when
+  ``None`` value is passed (:issue:`667`)
+- Per-request retry times with the new :reqmeta:`max_retry_times` meta key
+  (:issue:`2642`)
+- ``python -m scrapy`` as a more explicit alternative to ``scrapy`` command
+  (:issue:`2740`)
+
+.. _brotli: https://github.com/google/brotli
+.. _brotlipy: https://github.com/python-hyper/brotlipy/
+
+Bug fixes
+~~~~~~~~~
+
+- LinkExtractor now strips leading and trailing whitespaces from attributes
+  (:issue:`2547`, fixes :issue:`1614`)
+- Properly handle whitespaces in action attribute in
+  :class:`~scrapy.http.FormRequest` (:issue:`2548`)
+- Buffer CONNECT response bytes from proxy until all HTTP headers are received
+  (:issue:`2495`, fixes :issue:`2491`)
+- FTP downloader now works on Python 3, provided you use Twisted>=17.1
+  (:issue:`2599`)
+- Use body to choose response type after decompressing content (:issue:`2393`,
+  fixes :issue:`2145`)
+- Always decompress ``Content-Encoding: gzip`` at :class:`HttpCompressionMiddleware
+  <scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware>` stage (:issue:`2391`)
+- Respect custom log level in ``Spider.custom_settings`` (:issue:`2581`,
+  fixes :issue:`1612`)
+- 'make htmlview' fix for macOS (:issue:`2661`)
+- Remove "commands" from the command list  (:issue:`2695`)
+- Fix duplicate Content-Length header for POST requests with empty body (:issue:`2677`)
+- Properly cancel large downloads, i.e. above :setting:`DOWNLOAD_MAXSIZE` (:issue:`1616`)
+- ImagesPipeline: fixed processing of transparent PNG images with palette
+  (:issue:`2675`)
+
+Cleanups & Refactoring
+~~~~~~~~~~~~~~~~~~~~~~
+
+- Tests: remove temp files and folders (:issue:`2570`),
+  fixed ProjectUtilsTest on macOS (:issue:`2569`),
+  use portable pypy for Linux on Travis CI (:issue:`2710`)
+- Separate building request from ``_requests_to_follow`` in CrawlSpider (:issue:`2562`)
+- Remove “Python 3 progress” badge (:issue:`2567`)
+- Add a couple more lines to ``.gitignore`` (:issue:`2557`)
+- Remove bumpversion prerelease configuration (:issue:`2159`)
+- Add codecov.yml file (:issue:`2750`)
+- Set context factory implementation based on Twisted version (:issue:`2577`,
+  fixes :issue:`2560`)
+- Add omitted ``self`` arguments in default project middleware template (:issue:`2595`)
+- Remove redundant ``slot.add_request()`` call in ExecutionEngine (:issue:`2617`)
+- Catch more specific ``os.error`` exception in
+  ``scrapy.pipelines.files.FSFilesStore`` (:issue:`2644`)
+- Change "localhost" test server certificate (:issue:`2720`)
+- Remove unused ``MEMUSAGE_REPORT`` setting (:issue:`2576`)
+
+Documentation
+~~~~~~~~~~~~~
+
+- Binary mode is required for exporters (:issue:`2564`, fixes :issue:`2553`)
+- Mention issue with :meth:`FormRequest.from_response
+  <scrapy.http.FormRequest.from_response>` due to bug in lxml (:issue:`2572`)
+- Use single quotes uniformly in templates (:issue:`2596`)
+- Document :reqmeta:`ftp_user` and :reqmeta:`ftp_password` meta keys (:issue:`2587`)
+- Removed section on deprecated ``contrib/`` (:issue:`2636`)
+- Recommend Anaconda when installing Scrapy on Windows
+  (:issue:`2477`, fixes :issue:`2475`)
+- FAQ: rewrite note on Python 3 support on Windows (:issue:`2690`)
+- Rearrange selector sections (:issue:`2705`)
+- Remove ``__nonzero__`` from :class:`~scrapy.selector.SelectorList`
+  docs (:issue:`2683`)
+- Mention how to disable request filtering in documentation of
+  :setting:`DUPEFILTER_CLASS` setting (:issue:`2714`)
+- Add sphinx_rtd_theme to docs setup readme (:issue:`2668`)
+- Open file in text mode in JSON item writer example (:issue:`2729`)
+- Clarify ``allowed_domains`` example (:issue:`2670`)
+
+
+.. _release-1.3.3:
+
+Scrapy 1.3.3 (2017-03-10)
+-------------------------
+
+Bug fixes
+~~~~~~~~~
+
+- Make ``SpiderLoader`` raise ``ImportError`` again by default for missing
+  dependencies and wrong :setting:`SPIDER_MODULES`.
+  These exceptions were silenced as warnings since 1.3.0.
+  A new setting is introduced to toggle between warning or exception if needed ;
+  see :setting:`SPIDER_LOADER_WARN_ONLY` for details.
+
+.. _release-1.3.2:
+
+Scrapy 1.3.2 (2017-02-13)
+-------------------------
+
+Bug fixes
+~~~~~~~~~
+
+- Preserve request class when converting to/from dicts (utils.reqser) (:issue:`2510`).
+- Use consistent selectors for author field in tutorial (:issue:`2551`).
+- Fix TLS compatibility in Twisted 17+ (:issue:`2558`)
+
+.. _release-1.3.1:
+
+Scrapy 1.3.1 (2017-02-08)
+-------------------------
+
+New features
+~~~~~~~~~~~~
+
+- Support ``'True'`` and ``'False'`` string values for boolean settings (:issue:`2519`);
+  you can now do something like ``scrapy crawl myspider -s REDIRECT_ENABLED=False``.
+- Support kwargs with ``response.xpath()`` to use :ref:`XPath variables <topics-selectors-xpath-variables>`
+  and ad-hoc namespaces declarations ;
+  this requires at least Parsel v1.1 (:issue:`2457`).
+- Add support for Python 3.6 (:issue:`2485`).
+- Run tests on PyPy (warning: some tests still fail, so PyPy is not supported yet).
+
+Bug fixes
+~~~~~~~~~
+
+- Enforce ``DNS_TIMEOUT`` setting (:issue:`2496`).
+- Fix :command:`view` command ; it was a regression in v1.3.0 (:issue:`2503`).
+- Fix tests regarding ``*_EXPIRES settings`` with Files/Images pipelines (:issue:`2460`).
+- Fix name of generated pipeline class when using basic project template (:issue:`2466`).
+- Fix compatibility with Twisted 17+ (:issue:`2496`, :issue:`2528`).
+- Fix ``scrapy.Item`` inheritance on Python 3.6 (:issue:`2511`).
+- Enforce numeric values for components order in ``SPIDER_MIDDLEWARES``,
+  ``DOWNLOADER_MIDDLEWARES``, ``EXTENIONS`` and ``SPIDER_CONTRACTS`` (:issue:`2420`).
+
+Documentation
+~~~~~~~~~~~~~
+
+- Reword Code of Conduct section and upgrade to Contributor Covenant v1.4
+  (:issue:`2469`).
+- Clarify that passing spider arguments converts them to spider attributes
+  (:issue:`2483`).
+- Document ``formid`` argument on ``FormRequest.from_response()`` (:issue:`2497`).
+- Add .rst extension to README files (:issue:`2507`).
+- Mention LevelDB cache storage backend (:issue:`2525`).
+- Use ``yield`` in sample callback code (:issue:`2533`).
+- Add note about HTML entities decoding with ``.re()/.re_first()`` (:issue:`1704`).
+- Typos (:issue:`2512`, :issue:`2534`, :issue:`2531`).
+
+Cleanups
+~~~~~~~~
+
+- Remove redundant check in ``MetaRefreshMiddleware`` (:issue:`2542`).
+- Faster checks in ``LinkExtractor`` for allow/deny patterns (:issue:`2538`).
+- Remove dead code supporting old Twisted versions (:issue:`2544`).
+
+
+.. _release-1.3.0:
+
+Scrapy 1.3.0 (2016-12-21)
+-------------------------
+
+This release comes rather soon after 1.2.2 for one main reason:
+it was found out that releases since 0.18 up to 1.2.2 (included) use
+some backported code from Twisted (``scrapy.xlib.tx.*``),
+even if newer Twisted modules are available.
+Scrapy now uses ``twisted.web.client`` and ``twisted.internet.endpoints`` directly.
+(See also cleanups below.)
+
+As it is a major change, we wanted to get the bug fix out quickly
+while not breaking any projects using the 1.2 series.
+
+New Features
+~~~~~~~~~~~~
+
+- ``MailSender`` now accepts single strings as values for ``to`` and ``cc``
+  arguments (:issue:`2272`)
+- ``scrapy fetch url``, ``scrapy shell url`` and ``fetch(url)`` inside
+  Scrapy shell now follow HTTP redirections by default (:issue:`2290`);
+  See :command:`fetch` and :command:`shell` for details.
+- ``HttpErrorMiddleware`` now logs errors with ``INFO`` level instead of ``DEBUG``;
+  this is technically **backward incompatible** so please check your log parsers.
+- By default, logger names now use a long-form path, e.g. ``[scrapy.extensions.logstats]``,
+  instead of the shorter "top-level" variant of prior releases (e.g. ``[scrapy]``);
+  this is **backward incompatible** if you have log parsers expecting the short
+  logger name part. You can switch back to short logger names using :setting:`LOG_SHORT_NAMES`
+  set to ``True``.
+
+Dependencies & Cleanups
+~~~~~~~~~~~~~~~~~~~~~~~
+
+- Scrapy now requires Twisted >= 13.1 which is the case for many Linux
+  distributions already.
+- As a consequence, we got rid of ``scrapy.xlib.tx.*`` modules, which
+  copied some of Twisted code for users stuck with an "old" Twisted version
+- ``ChunkedTransferMiddleware`` is deprecated and removed from the default
+  downloader middlewares.
+
+.. _release-1.2.3:
+
+Scrapy 1.2.3 (2017-03-03)
+-------------------------
+
+- Packaging fix: disallow unsupported Twisted versions in setup.py
+
+
+.. _release-1.2.2:
+
+Scrapy 1.2.2 (2016-12-06)
+-------------------------
+
+Bug fixes
+~~~~~~~~~
+
+- Fix a cryptic traceback when a pipeline fails on ``open_spider()`` (:issue:`2011`)
+- Fix embedded IPython shell variables (fixing :issue:`396` that re-appeared
+  in 1.2.0, fixed in :issue:`2418`)
+- A couple of patches when dealing with robots.txt:
+
+  - handle (non-standard) relative sitemap URLs (:issue:`2390`)
+  - handle non-ASCII URLs and User-Agents in Python 2 (:issue:`2373`)
+
+Documentation
+~~~~~~~~~~~~~
+
+- Document ``"download_latency"`` key in ``Request``'s ``meta`` dict (:issue:`2033`)
+- Remove page on (deprecated & unsupported) Ubuntu packages from ToC (:issue:`2335`)
+- A few fixed typos (:issue:`2346`, :issue:`2369`, :issue:`2369`, :issue:`2380`)
+  and clarifications (:issue:`2354`, :issue:`2325`, :issue:`2414`)
+
+Other changes
+~~~~~~~~~~~~~
+
+- Advertize `conda-forge`_ as Scrapy's official conda channel (:issue:`2387`)
+- More helpful error messages when trying to use ``.css()`` or ``.xpath()``
+  on non-Text Responses (:issue:`2264`)
+- ``startproject`` command now generates a sample ``middlewares.py`` file (:issue:`2335`)
+- Add more dependencies' version info in ``scrapy version`` verbose output (:issue:`2404`)
+- Remove all ``*.pyc`` files from source distribution (:issue:`2386`)
+
+.. _conda-forge: https://anaconda.org/conda-forge/scrapy
+
+
+.. _release-1.2.1:
+
+Scrapy 1.2.1 (2016-10-21)
+-------------------------
+
+Bug fixes
+~~~~~~~~~
+
+- Include OpenSSL's more permissive default ciphers when establishing
+  TLS/SSL connections (:issue:`2314`).
+- Fix "Location" HTTP header decoding on non-ASCII URL redirects (:issue:`2321`).
+
+Documentation
+~~~~~~~~~~~~~
+
+- Fix JsonWriterPipeline example (:issue:`2302`).
+- Various notes: :issue:`2330` on spider names,
+  :issue:`2329` on middleware methods processing order,
+  :issue:`2327` on getting multi-valued HTTP headers as lists.
+
+Other changes
+~~~~~~~~~~~~~
+
+- Removed ``www.`` from ``start_urls`` in built-in spider templates (:issue:`2299`).
+
+
+.. _release-1.2.0:
+
+Scrapy 1.2.0 (2016-10-03)
+-------------------------
+
+New Features
+~~~~~~~~~~~~
+
+- New :setting:`FEED_EXPORT_ENCODING` setting to customize the encoding
+  used when writing items to a file.
+  This can be used to turn off ``\uXXXX`` escapes in JSON output.
+  This is also useful for those wanting something else than UTF-8
+  for XML or CSV output (:issue:`2034`).
+- ``startproject`` command now supports an optional destination directory
+  to override the default one based on the project name (:issue:`2005`).
+- New :setting:`SCHEDULER_DEBUG` setting to log requests serialization
+  failures (:issue:`1610`).
+- JSON encoder now supports serialization of ``set`` instances (:issue:`2058`).
+- Interpret ``application/json-amazonui-streaming`` as ``TextResponse`` (:issue:`1503`).
+- ``scrapy`` is imported by default when using shell tools (:command:`shell`,
+  :ref:`inspect_response <topics-shell-inspect-response>`) (:issue:`2248`).
+
+Bug fixes
+~~~~~~~~~
+
+- DefaultRequestHeaders middleware now runs before UserAgent middleware
+  (:issue:`2088`). **Warning: this is technically backward incompatible**,
+  though we consider this a bug fix.
+- HTTP cache extension and plugins that use the ``.scrapy`` data directory now
+  work outside projects (:issue:`1581`).  **Warning: this is technically
+  backward incompatible**, though we consider this a bug fix.
+- ``Selector`` does not allow passing both ``response`` and ``text`` anymore
+  (:issue:`2153`).
+- Fixed logging of wrong callback name with ``scrapy parse`` (:issue:`2169`).
+- Fix for an odd gzip decompression bug (:issue:`1606`).
+- Fix for selected callbacks when using ``CrawlSpider`` with :command:`scrapy parse <parse>`
+  (:issue:`2225`).
+- Fix for invalid JSON and XML files when spider yields no items (:issue:`872`).
+- Implement ``flush()`` fpr ``StreamLogger`` avoiding a warning in logs (:issue:`2125`).
+
+Refactoring
+~~~~~~~~~~~
+
+- ``canonicalize_url`` has been moved to `w3lib.url`_ (:issue:`2168`).
+
+.. _w3lib.url: https://w3lib.readthedocs.io/en/latest/w3lib.html#w3lib.url.canonicalize_url
+
+Tests & Requirements
+~~~~~~~~~~~~~~~~~~~~
+
+Scrapy's new requirements baseline is Debian 8 "Jessie". It was previously
+Ubuntu 12.04 Precise.
+What this means in practice is that we run continuous integration tests
+with these (main) packages versions at a minimum:
+Twisted 14.0, pyOpenSSL 0.14, lxml 3.4.
+
+Scrapy may very well work with older versions of these packages
+(the code base still has switches for older Twisted versions for example)
+but it is not guaranteed (because it's not tested anymore).
+
+Documentation
+~~~~~~~~~~~~~
+
+- Grammar fixes: :issue:`2128`, :issue:`1566`.
+- Download stats badge removed from README (:issue:`2160`).
+- New Scrapy :ref:`architecture diagram <topics-architecture>` (:issue:`2165`).
+- Updated ``Response`` parameters documentation (:issue:`2197`).
+- Reworded misleading :setting:`RANDOMIZE_DOWNLOAD_DELAY` description (:issue:`2190`).
+- Add StackOverflow as a support channel (:issue:`2257`).
+
+.. _release-1.1.4:
+
+Scrapy 1.1.4 (2017-03-03)
+-------------------------
+
+- Packaging fix: disallow unsupported Twisted versions in setup.py
+
+.. _release-1.1.3:
+
+Scrapy 1.1.3 (2016-09-22)
+-------------------------
+
+Bug fixes
+~~~~~~~~~
+
+- Class attributes for subclasses of ``ImagesPipeline`` and ``FilesPipeline``
+  work as they did before 1.1.1 (:issue:`2243`, fixes :issue:`2198`)
+
+Documentation
+~~~~~~~~~~~~~
+
+- :ref:`Overview <intro-overview>` and :ref:`tutorial <intro-tutorial>`
+  rewritten to use http://toscrape.com websites
+  (:issue:`2236`, :issue:`2249`, :issue:`2252`).
+
+.. _release-1.1.2:
+
+Scrapy 1.1.2 (2016-08-18)
+-------------------------
+
+Bug fixes
+~~~~~~~~~
+
+- Introduce a missing :setting:`IMAGES_STORE_S3_ACL` setting to override
+  the default ACL policy in ``ImagesPipeline`` when uploading images to S3
+  (note that default ACL policy is "private" -- instead of "public-read" --
+  since Scrapy 1.1.0)
+- :setting:`IMAGES_EXPIRES` default value set back to 90
+  (the regression was introduced in 1.1.1)
+
+.. _release-1.1.1:
+
+Scrapy 1.1.1 (2016-07-13)
+-------------------------
+
+Bug fixes
+~~~~~~~~~
+
+- Add "Host" header in CONNECT requests to HTTPS proxies (:issue:`2069`)
+- Use response ``body`` when choosing response class
+  (:issue:`2001`, fixes :issue:`2000`)
+- Do not fail on canonicalizing URLs with wrong netlocs
+  (:issue:`2038`, fixes :issue:`2010`)
+- a few fixes for ``HttpCompressionMiddleware`` (and ``SitemapSpider``):
+
+  - Do not decode HEAD responses (:issue:`2008`, fixes :issue:`1899`)
+  - Handle charset parameter in gzip Content-Type header
+    (:issue:`2050`, fixes :issue:`2049`)
+  - Do not decompress gzip octet-stream responses
+    (:issue:`2065`, fixes :issue:`2063`)
+
+- Catch (and ignore with a warning) exception when verifying certificate
+  against IP-address hosts (:issue:`2094`, fixes :issue:`2092`)
+- Make ``FilesPipeline`` and ``ImagesPipeline`` backward compatible again
+  regarding the use of legacy class attributes for customization
+  (:issue:`1989`, fixes :issue:`1985`)
+
+
+New features
+~~~~~~~~~~~~
+
+- Enable genspider command outside project folder (:issue:`2052`)
+- Retry HTTPS CONNECT ``TunnelError`` by default (:issue:`1974`)
+
+
+Documentation
+~~~~~~~~~~~~~
+
+- ``FEED_TEMPDIR`` setting at lexicographical position (:commit:`9b3c72c`)
+- Use idiomatic ``.extract_first()`` in overview (:issue:`1994`)
+- Update years in copyright notice (:commit:`c2c8036`)
+- Add information and example on errbacks (:issue:`1995`)
+- Use "url" variable in downloader middleware example (:issue:`2015`)
+- Grammar fixes (:issue:`2054`, :issue:`2120`)
+- New FAQ entry on using BeautifulSoup in spider callbacks (:issue:`2048`)
+- Add notes about Scrapy not working on Windows with Python 3 (:issue:`2060`)
+- Encourage complete titles in pull requests (:issue:`2026`)
+
+Tests
+~~~~~
+
+- Upgrade py.test requirement on Travis CI and Pin pytest-cov to 2.2.1 (:issue:`2095`)
+
+.. _release-1.1.0:
+
+Scrapy 1.1.0 (2016-05-11)
+-------------------------
+
+This 1.1 release brings a lot of interesting features and bug fixes:
+
+- Scrapy 1.1 has beta Python 3 support (requires Twisted >= 15.5). See
+  :ref:`news_betapy3` for more details and some limitations.
+- Hot new features:
+
+  - Item loaders now support nested loaders (:issue:`1467`).
+  - ``FormRequest.from_response`` improvements (:issue:`1382`, :issue:`1137`).
+  - Added setting :setting:`AUTOTHROTTLE_TARGET_CONCURRENCY` and improved
+    AutoThrottle docs (:issue:`1324`).
+  - Added ``response.text`` to get body as unicode (:issue:`1730`).
+  - Anonymous S3 connections (:issue:`1358`).
+  - Deferreds in downloader middlewares (:issue:`1473`). This enables better
+    robots.txt handling (:issue:`1471`).
+  - HTTP caching now follows RFC2616 more closely, added settings
+    :setting:`HTTPCACHE_ALWAYS_STORE` and
+    :setting:`HTTPCACHE_IGNORE_RESPONSE_CACHE_CONTROLS` (:issue:`1151`).
+  - Selectors were extracted to the parsel_ library (:issue:`1409`). This means
+    you can use Scrapy Selectors without Scrapy and also upgrade the
+    selectors engine without needing to upgrade Scrapy.
+  - HTTPS downloader now does TLS protocol negotiation by default,
+    instead of forcing TLS 1.0. You can also set the SSL/TLS method
+    using the new :setting:`DOWNLOADER_CLIENT_TLS_METHOD`.
+
+- These bug fixes may require your attention:
+
+  - Don't retry bad requests (HTTP 400) by default (:issue:`1289`).
+    If you need the old behavior, add ``400`` to :setting:`RETRY_HTTP_CODES`.
+  - Fix shell files argument handling (:issue:`1710`, :issue:`1550`).
+    If you try ``scrapy shell index.html`` it will try to load the URL http://index.html,
+    use ``scrapy shell ./index.html`` to load a local file.
+  - Robots.txt compliance is now enabled by default for newly-created projects
+    (:issue:`1724`). Scrapy will also wait for robots.txt to be downloaded
+    before proceeding with the crawl (:issue:`1735`). If you want to disable
+    this behavior, update :setting:`ROBOTSTXT_OBEY` in ``settings.py`` file
+    after creating a new project.
+  - Exporters now work on unicode, instead of bytes by default (:issue:`1080`).
+    If you use :class:`~scrapy.exporters.PythonItemExporter`, you may want to
+    update your code to disable binary mode which is now deprecated.
+  - Accept XML node names containing dots as valid (:issue:`1533`).
+  - When uploading files or images to S3 (with ``FilesPipeline`` or
+    ``ImagesPipeline``), the default ACL policy is now "private" instead
+    of "public" **Warning: backward incompatible!**.
+    You can use :setting:`FILES_STORE_S3_ACL` to change it.
+  - We've reimplemented ``canonicalize_url()`` for more correct output,
+    especially for URLs with non-ASCII characters (:issue:`1947`).
+    This could change link extractors output compared to previous Scrapy versions.
+    This may also invalidate some cache entries you could still have from pre-1.1 runs.
+    **Warning: backward incompatible!**.
+
+Keep reading for more details on other improvements and bug fixes.
+
+.. _news_betapy3:
+
+Beta Python 3 Support
+~~~~~~~~~~~~~~~~~~~~~
+
+We have been `hard at work to make Scrapy run on Python 3
+<https://github.com/scrapy/scrapy/wiki/Python-3-Porting>`_. As a result, now
+you can run spiders on Python 3.3, 3.4 and 3.5 (Twisted >= 15.5 required). Some
+features are still missing (and some may never be ported).
+
+
+Almost all builtin extensions/middlewares are expected to work.
+However, we are aware of some limitations in Python 3:
+
+- Scrapy does not work on Windows with Python 3
+- Sending emails is not supported
+- FTP download handler is not supported
+- Telnet console is not supported
+
+Additional New Features and Enhancements
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- Scrapy now has a `Code of Conduct`_ (:issue:`1681`).
+- Command line tool now has completion for zsh (:issue:`934`).
+- Improvements to ``scrapy shell``:
+
+  - Support for bpython and configure preferred Python shell via
+    ``SCRAPY_PYTHON_SHELL`` (:issue:`1100`, :issue:`1444`).
+  - Support URLs without scheme (:issue:`1498`)
+    **Warning: backward incompatible!**
+  - Bring back support for relative file path (:issue:`1710`, :issue:`1550`).
+
+- Added :setting:`MEMUSAGE_CHECK_INTERVAL_SECONDS` setting to change default check
+  interval (:issue:`1282`).
+- Download handlers are now lazy-loaded on first request using their
+  scheme (:issue:`1390`, :issue:`1421`).
+- HTTPS download handlers do not force TLS 1.0 anymore; instead,
+  OpenSSL's ``SSLv23_method()/TLS_method()`` is used allowing to try
+  negotiating with the remote hosts the highest TLS protocol version
+  it can (:issue:`1794`, :issue:`1629`).
+- ``RedirectMiddleware`` now skips the status codes from
+  ``handle_httpstatus_list`` on spider attribute
+  or in ``Request``'s ``meta`` key (:issue:`1334`, :issue:`1364`,
+  :issue:`1447`).
+- Form submission:
+
+  - now works with ``<button>`` elements too (:issue:`1469`).
+  - an empty string is now used for submit buttons without a value
+    (:issue:`1472`)
+
+- Dict-like settings now have per-key priorities
+  (:issue:`1135`, :issue:`1149` and :issue:`1586`).
+- Sending non-ASCII emails (:issue:`1662`)
+- ``CloseSpider`` and ``SpiderState`` extensions now get disabled if no relevant
+  setting is set (:issue:`1723`, :issue:`1725`).
+- Added method ``ExecutionEngine.close`` (:issue:`1423`).
+- Added method ``CrawlerRunner.create_crawler`` (:issue:`1528`).
+- Scheduler priority queue can now be customized via
+  :setting:`SCHEDULER_PRIORITY_QUEUE` (:issue:`1822`).
+- ``.pps`` links are now ignored by default in link extractors (:issue:`1835`).
+- temporary data folder for FTP and S3 feed storages can be customized
+  using a new :setting:`FEED_TEMPDIR` setting (:issue:`1847`).
+- ``FilesPipeline`` and ``ImagesPipeline`` settings are now instance attributes
+  instead of class attributes, enabling spider-specific behaviors (:issue:`1891`).
+- ``JsonItemExporter`` now formats opening and closing square brackets
+  on their own line (first and last lines of output file) (:issue:`1950`).
+- If available, ``botocore`` is used for ``S3FeedStorage``, ``S3DownloadHandler``
+  and ``S3FilesStore`` (:issue:`1761`, :issue:`1883`).
+- Tons of documentation updates and related fixes (:issue:`1291`, :issue:`1302`,
+  :issue:`1335`, :issue:`1683`, :issue:`1660`, :issue:`1642`, :issue:`1721`,
+  :issue:`1727`, :issue:`1879`).
+- Other refactoring, optimizations and cleanup (:issue:`1476`, :issue:`1481`,
+  :issue:`1477`, :issue:`1315`, :issue:`1290`, :issue:`1750`, :issue:`1881`).
+
+.. _`Code of Conduct`: https://github.com/scrapy/scrapy/blob/master/CODE_OF_CONDUCT.md
+
+
+Deprecations and Removals
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- Added ``to_bytes`` and ``to_unicode``, deprecated ``str_to_unicode`` and
+  ``unicode_to_str`` functions (:issue:`778`).
+- ``binary_is_text`` is introduced, to replace use of ``isbinarytext``
+  (but with inverse return value) (:issue:`1851`)
+- The ``optional_features`` set has been removed (:issue:`1359`).
+- The ``--lsprof`` command line option has been removed (:issue:`1689`).
+  **Warning: backward incompatible**, but doesn't break user code.
+- The following datatypes were deprecated (:issue:`1720`):
+
+  + ``scrapy.utils.datatypes.MultiValueDictKeyError``
+  + ``scrapy.utils.datatypes.MultiValueDict``
+  + ``scrapy.utils.datatypes.SiteNode``
+
+- The previously bundled ``scrapy.xlib.pydispatch`` library was deprecated and
+  replaced by `pydispatcher <https://pypi.org/project/PyDispatcher/>`_.
+
+
+Relocations
+~~~~~~~~~~~
+
+- ``telnetconsole`` was relocated to ``extensions/`` (:issue:`1524`).
+
+  + Note: telnet is not enabled on Python 3
+    (https://github.com/scrapy/scrapy/pull/1524#issuecomment-146985595)
+
+.. _parsel: https://github.com/scrapy/parsel
+
+
+Bugfixes
+~~~~~~~~
+
+- Scrapy does not retry requests that got a ``HTTP 400 Bad Request``
+  response anymore (:issue:`1289`). **Warning: backward incompatible!**
+- Support empty password for http_proxy config (:issue:`1274`).
+- Interpret ``application/x-json`` as ``TextResponse`` (:issue:`1333`).
+- Support link rel attribute with multiple values (:issue:`1201`).
+- Fixed ``scrapy.http.FormRequest.from_response`` when there is a ``<base>``
+  tag (:issue:`1564`).
+- Fixed :setting:`TEMPLATES_DIR` handling (:issue:`1575`).
+- Various ``FormRequest`` fixes (:issue:`1595`, :issue:`1596`, :issue:`1597`).
+- Makes ``_monkeypatches`` more robust (:issue:`1634`).
+- Fixed bug on ``XMLItemExporter`` with non-string fields in
+  items (:issue:`1738`).
+- Fixed startproject command in macOS (:issue:`1635`).
+- Fixed :class:`~scrapy.exporters.PythonItemExporter` and CSVExporter for
+  non-string item types (:issue:`1737`).
+- Various logging related fixes (:issue:`1294`, :issue:`1419`, :issue:`1263`,
+  :issue:`1624`, :issue:`1654`, :issue:`1722`, :issue:`1726` and :issue:`1303`).
+- Fixed bug in ``utils.template.render_templatefile()`` (:issue:`1212`).
+- sitemaps extraction from ``robots.txt`` is now case-insensitive (:issue:`1902`).
+- HTTPS+CONNECT tunnels could get mixed up when using multiple proxies
+  to same remote host (:issue:`1912`).
+
+.. _release-1.0.7:
+
+Scrapy 1.0.7 (2017-03-03)
+-------------------------
+
+- Packaging fix: disallow unsupported Twisted versions in setup.py
+
+.. _release-1.0.6:
+
+Scrapy 1.0.6 (2016-05-04)
+-------------------------
+
+- FIX: RetryMiddleware is now robust to non-standard HTTP status codes (:issue:`1857`)
+- FIX: Filestorage HTTP cache was checking wrong modified time (:issue:`1875`)
+- DOC: Support for Sphinx 1.4+ (:issue:`1893`)
+- DOC: Consistency in selectors examples (:issue:`1869`)
+
+.. _release-1.0.5:
+
+Scrapy 1.0.5 (2016-02-04)
+-------------------------
+
+- FIX: [Backport] Ignore bogus links in LinkExtractors (fixes :issue:`907`, :commit:`108195e`)
+- TST: Changed buildbot makefile to use 'pytest' (:commit:`1f3d90a`)
+- DOC: Fixed typos in tutorial and media-pipeline (:commit:`808a9ea` and :commit:`803bd87`)
+- DOC: Add AjaxCrawlMiddleware to DOWNLOADER_MIDDLEWARES_BASE in settings docs (:commit:`aa94121`)
+
+.. _release-1.0.4:
+
+Scrapy 1.0.4 (2015-12-30)
+-------------------------
+
+- Ignoring xlib/tx folder, depending on Twisted version. (:commit:`7dfa979`)
+- Run on new travis-ci infra (:commit:`6e42f0b`)
+- Spelling fixes (:commit:`823a1cc`)
+- escape nodename in xmliter regex (:commit:`da3c155`)
+- test xml nodename with dots (:commit:`4418fc3`)
+- TST don't use broken Pillow version in tests (:commit:`a55078c`)
+- disable log on version command. closes #1426 (:commit:`86fc330`)
+- disable log on startproject command (:commit:`db4c9fe`)
+- Add PyPI download stats badge (:commit:`df2b944`)
+- don't run tests twice on Travis if a PR is made from a scrapy/scrapy branch (:commit:`a83ab41`)
+- Add Python 3 porting status badge to the README (:commit:`73ac80d`)
+- fixed RFPDupeFilter persistence (:commit:`97d080e`)
+- TST a test to show that dupefilter persistence is not working (:commit:`97f2fb3`)
+- explicit close file on file:// scheme handler (:commit:`d9b4850`)
+- Disable dupefilter in shell (:commit:`c0d0734`)
+- DOC: Add captions to toctrees which appear in sidebar (:commit:`aa239ad`)
+- DOC Removed pywin32 from install instructions as it's already declared as dependency. (:commit:`10eb400`)
+- Added installation notes about using Conda for Windows and other OSes. (:commit:`1c3600a`)
+- Fixed minor grammar issues. (:commit:`7f4ddd5`)
+- fixed a typo in the documentation. (:commit:`b71f677`)
+- Version 1 now exists (:commit:`5456c0e`)
+- fix another invalid xpath error (:commit:`0a1366e`)
+- fix ValueError: Invalid XPath: //div/[id="not-exists"]/text() on selectors.rst (:commit:`ca8d60f`)
+- Typos corrections (:commit:`7067117`)
+- fix typos in downloader-middleware.rst and exceptions.rst, middlware -> middleware (:commit:`32f115c`)
+- Add note to Ubuntu install section about Debian compatibility (:commit:`23fda69`)
+- Replace alternative macOS install workaround with virtualenv (:commit:`98b63ee`)
+- Reference Homebrew's homepage for installation instructions (:commit:`1925db1`)
+- Add oldest supported tox version to contributing docs (:commit:`5d10d6d`)
+- Note in install docs about pip being already included in python>=2.7.9 (:commit:`85c980e`)
+- Add non-python dependencies to Ubuntu install section in the docs (:commit:`fbd010d`)
+- Add macOS installation section to docs (:commit:`d8f4cba`)
+- DOC(ENH): specify path to rtd theme explicitly (:commit:`de73b1a`)
+- minor: scrapy.Spider docs grammar (:commit:`1ddcc7b`)
+- Make common practices sample code match the comments (:commit:`1b85bcf`)
+- nextcall repetitive calls (heartbeats). (:commit:`55f7104`)
+- Backport fix compatibility with Twisted 15.4.0 (:commit:`b262411`)
+- pin pytest to 2.7.3 (:commit:`a6535c2`)
+- Merge pull request #1512 from mgedmin/patch-1 (:commit:`8876111`)
+- Merge pull request #1513 from mgedmin/patch-2 (:commit:`5d4daf8`)
+- Typo (:commit:`f8d0682`)
+- Fix list formatting (:commit:`5f83a93`)
+- fix Scrapy squeue tests after recent changes to queuelib (:commit:`3365c01`)
+- Merge pull request #1475 from rweindl/patch-1 (:commit:`2d688cd`)
+- Update tutorial.rst (:commit:`fbc1f25`)
+- Merge pull request #1449 from rhoekman/patch-1 (:commit:`7d6538c`)
+- Small grammatical change (:commit:`8752294`)
+- Add openssl version to version command (:commit:`13c45ac`)
+
+.. _release-1.0.3:
+
+Scrapy 1.0.3 (2015-08-11)
+-------------------------
+
+- add service_identity to Scrapy install_requires (:commit:`cbc2501`)
+- Workaround for travis#296 (:commit:`66af9cd`)
+
+.. _release-1.0.2:
+
+Scrapy 1.0.2 (2015-08-06)
+-------------------------
+
+- Twisted 15.3.0 does not raises PicklingError serializing lambda functions (:commit:`b04dd7d`)
+- Minor method name fix (:commit:`6f85c7f`)
+- minor: scrapy.Spider grammar and clarity (:commit:`9c9d2e0`)
+- Put a blurb about support channels in CONTRIBUTING (:commit:`c63882b`)
+- Fixed typos (:commit:`a9ae7b0`)
+- Fix doc reference. (:commit:`7c8a4fe`)
+
+.. _release-1.0.1:
+
+Scrapy 1.0.1 (2015-07-01)
+-------------------------
+
+- Unquote request path before passing to FTPClient, it already escape paths (:commit:`cc00ad2`)
+- include tests/ to source distribution in MANIFEST.in (:commit:`eca227e`)
+- DOC Fix SelectJmes documentation (:commit:`b8567bc`)
+- DOC Bring Ubuntu and Archlinux outside of Windows subsection (:commit:`392233f`)
+- DOC remove version suffix from Ubuntu package (:commit:`5303c66`)
+- DOC Update release date for 1.0 (:commit:`c89fa29`)
+
+.. _release-1.0.0:
+
+Scrapy 1.0.0 (2015-06-19)
+-------------------------
+
+You will find a lot of new features and bugfixes in this major release.  Make
+sure to check our updated :ref:`overview <intro-overview>` to get a glance of
+some of the changes, along with our brushed :ref:`tutorial <intro-tutorial>`.
+
+Support for returning dictionaries in spiders
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Declaring and returning Scrapy Items is no longer necessary to collect the
+scraped data from your spider, you can now return explicit dictionaries
+instead.
+
+*Classic version*
+
+::
+
+    class MyItem(scrapy.Item):
+        url = scrapy.Field()
+
+    class MySpider(scrapy.Spider):
+        def parse(self, response):
+            return MyItem(url=response.url)
+
+*New version*
+
+::
+
+    class MySpider(scrapy.Spider):
+        def parse(self, response):
+            return {'url': response.url}
+
+Per-spider settings (GSoC 2014)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Last Google Summer of Code project accomplished an important redesign of the
+mechanism used for populating settings, introducing explicit priorities to
+override any given setting. As an extension of that goal, we included a new
+level of priority for settings that act exclusively for a single spider,
+allowing them to redefine project settings.
+
+Start using it by defining a :attr:`~scrapy.spiders.Spider.custom_settings`
+class variable in your spider::
+
+    class MySpider(scrapy.Spider):
+        custom_settings = {
+            "DOWNLOAD_DELAY": 5.0,
+            "RETRY_ENABLED": False,
+        }
+
+Read more about settings population: :ref:`topics-settings`
+
+Python Logging
+~~~~~~~~~~~~~~
+
+Scrapy 1.0 has moved away from Twisted logging to support Python built in’s
+as default logging system. We’re maintaining backward compatibility for most
+of the old custom interface to call logging functions, but you’ll get
+warnings to switch to the Python logging API entirely.
+
+*Old version*
+
+::
+
+    from scrapy import log
+    log.msg('MESSAGE', log.INFO)
+
+*New version*
+
+::
+
+    import logging
+    logging.info('MESSAGE')
+
+Logging with spiders remains the same, but on top of the
+:meth:`~scrapy.spiders.Spider.log` method you’ll have access to a custom
+:attr:`~scrapy.spiders.Spider.logger` created for the spider to issue log
+events:
+
+::
+
+    class MySpider(scrapy.Spider):
+        def parse(self, response):
+            self.logger.info('Response received')
+
+Read more in the logging documentation: :ref:`topics-logging`
+
+Crawler API refactoring (GSoC 2014)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Another milestone for last Google Summer of Code was a refactoring of the
+internal API, seeking a simpler and easier usage. Check new core interface
+in: :ref:`topics-api`
+
+A common situation where you will face these changes is while running Scrapy
+from scripts. Here’s a quick example of how to run a Spider manually with the
+new API:
+
+::
+
+    from scrapy.crawler import CrawlerProcess
+
+    process = CrawlerProcess({
+        'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'
+    })
+    process.crawl(MySpider)
+    process.start()
+
+Bear in mind this feature is still under development and its API may change
+until it reaches a stable status.
+
+See more examples for scripts running Scrapy: :ref:`topics-practices`
+
+.. _module-relocations:
+
+Module Relocations
+~~~~~~~~~~~~~~~~~~
+
+There’s been a large rearrangement of modules trying to improve the general
+structure of Scrapy. Main changes were separating various subpackages into
+new projects and dissolving both ``scrapy.contrib`` and ``scrapy.contrib_exp``
+into top level packages. Backward compatibility was kept among internal
+relocations, while importing deprecated modules expect warnings indicating
+their new place.
+
+Full list of relocations
+************************
+
+Outsourced packages
+
+.. note::
+    These extensions went through some minor changes, e.g. some setting names
+    were changed. Please check the documentation in each new repository to
+    get familiar with the new usage.
+
++-------------------------------------+-------------------------------------+
+| Old location                        | New location                        |
++=====================================+=====================================+
+| scrapy.commands.deploy              | `scrapyd-client <https://github.com |
+|                                     | /scrapy/scrapyd-client>`_           |
+|                                     | (See other alternatives here:       |
+|                                     | :ref:`topics-deploy`)               |
++-------------------------------------+-------------------------------------+
+| scrapy.contrib.djangoitem           | `scrapy-djangoitem <https://github. |
+|                                     | com/scrapy-plugins/scrapy-djangoite |
+|                                     | m>`_                                |
++-------------------------------------+-------------------------------------+
+| scrapy.webservice                   | `scrapy-jsonrpc <https://github.com |
+|                                     | /scrapy-plugins/scrapy-jsonrpc>`_   |
++-------------------------------------+-------------------------------------+
+
+``scrapy.contrib_exp`` and ``scrapy.contrib`` dissolutions
+
++-------------------------------------+-------------------------------------+
+| Old location                        | New location                        |
++=====================================+=====================================+
+| scrapy.contrib\_exp.downloadermidd\ | scrapy.downloadermiddlewares.decom\ |
+| leware.decompression                | pression                            |
++-------------------------------------+-------------------------------------+
+| scrapy.contrib\_exp.iterators       | scrapy.utils.iterators              |
++-------------------------------------+-------------------------------------+
+| scrapy.contrib.downloadermiddleware | scrapy.downloadermiddlewares        |
++-------------------------------------+-------------------------------------+
+| scrapy.contrib.exporter             | scrapy.exporters                    |
++-------------------------------------+-------------------------------------+
+| scrapy.contrib.linkextractors       | scrapy.linkextractors               |
++-------------------------------------+-------------------------------------+
+| scrapy.contrib.loader               | scrapy.loader                       |
++-------------------------------------+-------------------------------------+
+| scrapy.contrib.loader.processor     | scrapy.loader.processors            |
++-------------------------------------+-------------------------------------+
+| scrapy.contrib.pipeline             | scrapy.pipelines                    |
++-------------------------------------+-------------------------------------+
+| scrapy.contrib.spidermiddleware     | scrapy.spidermiddlewares            |
++-------------------------------------+-------------------------------------+
+| scrapy.contrib.spiders              | scrapy.spiders                      |
++-------------------------------------+-------------------------------------+
+| * scrapy.contrib.closespider        | scrapy.extensions.\*                |
+| * scrapy.contrib.corestats          |                                     |
+| * scrapy.contrib.debug              |                                     |
+| * scrapy.contrib.feedexport         |                                     |
+| * scrapy.contrib.httpcache          |                                     |
+| * scrapy.contrib.logstats           |                                     |
+| * scrapy.contrib.memdebug           |                                     |
+| * scrapy.contrib.memusage           |                                     |
+| * scrapy.contrib.spiderstate        |                                     |
+| * scrapy.contrib.statsmailer        |                                     |
+| * scrapy.contrib.throttle           |                                     |
++-------------------------------------+-------------------------------------+
+
+Plural renames and Modules unification
+
++-------------------------------------+-------------------------------------+
+| Old location                        | New location                        |
++=====================================+=====================================+
+| scrapy.command                      | scrapy.commands                     |
++-------------------------------------+-------------------------------------+
+| scrapy.dupefilter                   | scrapy.dupefilters                  |
++-------------------------------------+-------------------------------------+
+| scrapy.linkextractor                | scrapy.linkextractors               |
++-------------------------------------+-------------------------------------+
+| scrapy.spider                       | scrapy.spiders                      |
++-------------------------------------+-------------------------------------+
+| scrapy.squeue                       | scrapy.squeues                      |
++-------------------------------------+-------------------------------------+
+| scrapy.statscol                     | scrapy.statscollectors              |
++-------------------------------------+-------------------------------------+
+| scrapy.utils.decorator              | scrapy.utils.decorators             |
++-------------------------------------+-------------------------------------+
+
+Class renames
+
++-------------------------------------+-------------------------------------+
+| Old location                        | New location                        |
++=====================================+=====================================+
+| scrapy.spidermanager.SpiderManager  | scrapy.spiderloader.SpiderLoader    |
++-------------------------------------+-------------------------------------+
+
+Settings renames
+
++-------------------------------------+-------------------------------------+
+| Old location                        | New location                        |
++=====================================+=====================================+
+| SPIDER\_MANAGER\_CLASS              | SPIDER\_LOADER\_CLASS               |
++-------------------------------------+-------------------------------------+
+
+Changelog
+~~~~~~~~~
+
+New Features and Enhancements
+
+- Python logging (:issue:`1060`, :issue:`1235`, :issue:`1236`, :issue:`1240`,
+  :issue:`1259`, :issue:`1278`, :issue:`1286`)
+- FEED_EXPORT_FIELDS option (:issue:`1159`, :issue:`1224`)
+- Dns cache size and timeout options (:issue:`1132`)
+- support namespace prefix in xmliter_lxml (:issue:`963`)
+- Reactor threadpool max size setting (:issue:`1123`)
+- Allow spiders to return dicts. (:issue:`1081`)
+- Add Response.urljoin() helper (:issue:`1086`)
+- look in ~/.config/scrapy.cfg for user config (:issue:`1098`)
+- handle TLS SNI (:issue:`1101`)
+- Selectorlist extract first (:issue:`624`, :issue:`1145`)
+- Added JmesSelect (:issue:`1016`)
+- add gzip compression to filesystem http cache backend (:issue:`1020`)
+- CSS support in link extractors (:issue:`983`)
+- httpcache dont_cache meta #19 #689 (:issue:`821`)
+- add signal to be sent when request is dropped by the scheduler
+  (:issue:`961`)
+- avoid download large response (:issue:`946`)
+- Allow to specify the quotechar in CSVFeedSpider (:issue:`882`)
+- Add referer to "Spider error processing" log message (:issue:`795`)
+- process robots.txt once (:issue:`896`)
+- GSoC Per-spider settings (:issue:`854`)
+- Add project name validation (:issue:`817`)
+- GSoC API cleanup (:issue:`816`, :issue:`1128`, :issue:`1147`,
+  :issue:`1148`, :issue:`1156`, :issue:`1185`, :issue:`1187`, :issue:`1258`,
+  :issue:`1268`, :issue:`1276`, :issue:`1285`, :issue:`1284`)
+- Be more responsive with IO operations (:issue:`1074` and :issue:`1075`)
+- Do leveldb compaction for httpcache on closing (:issue:`1297`)
+
+Deprecations and Removals
+
+- Deprecate htmlparser link extractor (:issue:`1205`)
+- remove deprecated code from FeedExporter (:issue:`1155`)
+- a leftover for.15 compatibility (:issue:`925`)
+- drop support for CONCURRENT_REQUESTS_PER_SPIDER (:issue:`895`)
+- Drop old engine code (:issue:`911`)
+- Deprecate SgmlLinkExtractor (:issue:`777`)
+
+Relocations
+
+- Move exporters/__init__.py to exporters.py (:issue:`1242`)
+- Move base classes to their packages (:issue:`1218`, :issue:`1233`)
+- Module relocation (:issue:`1181`, :issue:`1210`)
+- rename SpiderManager to SpiderLoader (:issue:`1166`)
+- Remove djangoitem (:issue:`1177`)
+- remove scrapy deploy command (:issue:`1102`)
+- dissolve contrib_exp (:issue:`1134`)
+- Deleted bin folder from root, fixes #913 (:issue:`914`)
+- Remove jsonrpc based webservice (:issue:`859`)
+- Move Test cases under project root dir (:issue:`827`, :issue:`841`)
+- Fix backward incompatibility for relocated paths in settings
+  (:issue:`1267`)
+
+Documentation
+
+- CrawlerProcess documentation (:issue:`1190`)
+- Favoring web scraping over screen scraping in the descriptions
+  (:issue:`1188`)
+- Some improvements for Scrapy tutorial (:issue:`1180`)
+- Documenting Files Pipeline together with Images Pipeline (:issue:`1150`)
+- deployment docs tweaks (:issue:`1164`)
+- Added deployment section covering scrapyd-deploy and shub (:issue:`1124`)
+- Adding more settings to project template (:issue:`1073`)
+- some improvements to overview page (:issue:`1106`)
+- Updated link in docs/topics/architecture.rst (:issue:`647`)
+- DOC reorder topics (:issue:`1022`)
+- updating list of Request.meta special keys (:issue:`1071`)
+- DOC document download_timeout (:issue:`898`)
+- DOC simplify extension docs (:issue:`893`)
+- Leaks docs (:issue:`894`)
+- DOC document from_crawler method for item pipelines (:issue:`904`)
+- Spider_error doesn't support deferreds (:issue:`1292`)
+- Corrections & Sphinx related fixes (:issue:`1220`, :issue:`1219`,
+  :issue:`1196`, :issue:`1172`, :issue:`1171`, :issue:`1169`, :issue:`1160`,
+  :issue:`1154`, :issue:`1127`, :issue:`1112`, :issue:`1105`, :issue:`1041`,
+  :issue:`1082`, :issue:`1033`, :issue:`944`, :issue:`866`, :issue:`864`,
+  :issue:`796`, :issue:`1260`, :issue:`1271`, :issue:`1293`, :issue:`1298`)
+
+Bugfixes
+
+- Item multi inheritance fix (:issue:`353`, :issue:`1228`)
+- ItemLoader.load_item: iterate over copy of fields (:issue:`722`)
+- Fix Unhandled error in Deferred (RobotsTxtMiddleware) (:issue:`1131`,
+  :issue:`1197`)
+- Force to read DOWNLOAD_TIMEOUT as int (:issue:`954`)
+- scrapy.utils.misc.load_object should print full traceback (:issue:`902`)
+- Fix bug for ".local" host name (:issue:`878`)
+- Fix for Enabled extensions, middlewares, pipelines info not printed
+  anymore (:issue:`879`)
+- fix dont_merge_cookies bad behaviour when set to false on meta
+  (:issue:`846`)
+
+Python 3 In Progress Support
+
+- disable scrapy.telnet if twisted.conch is not available (:issue:`1161`)
+- fix Python 3 syntax errors in ajaxcrawl.py (:issue:`1162`)
+- more python3 compatibility changes for urllib (:issue:`1121`)
+- assertItemsEqual was renamed to assertCountEqual in Python 3.
+  (:issue:`1070`)
+- Import unittest.mock if available. (:issue:`1066`)
+- updated deprecated cgi.parse_qsl to use six's parse_qsl (:issue:`909`)
+- Prevent Python 3 port regressions (:issue:`830`)
+- PY3: use MutableMapping for python 3 (:issue:`810`)
+- PY3: use six.BytesIO and six.moves.cStringIO (:issue:`803`)
+- PY3: fix xmlrpclib and email imports (:issue:`801`)
+- PY3: use six for robotparser and urlparse (:issue:`800`)
+- PY3: use six.iterkeys, six.iteritems, and tempfile (:issue:`799`)
+- PY3: fix has_key and use six.moves.configparser (:issue:`798`)
+- PY3: use six.moves.cPickle (:issue:`797`)
+- PY3 make it possible to run some tests in Python3 (:issue:`776`)
+
+Tests
+
+- remove unnecessary lines from py3-ignores (:issue:`1243`)
+- Fix remaining warnings from pytest while collecting tests (:issue:`1206`)
+- Add docs build to travis (:issue:`1234`)
+- TST don't collect tests from deprecated modules. (:issue:`1165`)
+- install service_identity package in tests to prevent warnings
+  (:issue:`1168`)
+- Fix deprecated settings API in tests (:issue:`1152`)
+- Add test for webclient with POST method and no body given (:issue:`1089`)
+- py3-ignores.txt supports comments (:issue:`1044`)
+- modernize some of the asserts (:issue:`835`)
+- selector.__repr__ test (:issue:`779`)
+
+Code refactoring
+
+- CSVFeedSpider cleanup: use iterate_spider_output (:issue:`1079`)
+- remove unnecessary check from scrapy.utils.spider.iter_spider_output
+  (:issue:`1078`)
+- Pydispatch pep8 (:issue:`992`)
+- Removed unused 'load=False' parameter from walk_modules() (:issue:`871`)
+- For consistency, use ``job_dir`` helper in ``SpiderState`` extension.
+  (:issue:`805`)
+- rename "sflo" local variables to less cryptic "log_observer" (:issue:`775`)
+
+Scrapy 0.24.6 (2015-04-20)
+--------------------------
+
+- encode invalid xpath with unicode_escape under PY2 (:commit:`07cb3e5`)
+- fix IPython shell scope issue and load IPython user config (:commit:`2c8e573`)
+- Fix small typo in the docs (:commit:`d694019`)
+- Fix small typo (:commit:`f92fa83`)
+- Converted sel.xpath() calls to response.xpath() in Extracting the data (:commit:`c2c6d15`)
+
+
+Scrapy 0.24.5 (2015-02-25)
+--------------------------
+
+- Support new _getEndpoint Agent signatures on Twisted 15.0.0 (:commit:`540b9bc`)
+- DOC a couple more references are fixed (:commit:`b4c454b`)
+- DOC fix a reference (:commit:`e3c1260`)
+- t.i.b.ThreadedResolver is now a new-style class (:commit:`9e13f42`)
+- S3DownloadHandler: fix auth for requests with quoted paths/query params (:commit:`cdb9a0b`)
+- fixed the variable types in mailsender documentation (:commit:`bb3a848`)
+- Reset items_scraped instead of item_count (:commit:`edb07a4`)
+- Tentative attention message about what document to read for contributions (:commit:`7ee6f7a`)
+- mitmproxy 0.10.1 needs netlib 0.10.1 too (:commit:`874fcdd`)
+- pin mitmproxy 0.10.1 as >0.11 does not work with tests (:commit:`c6b21f0`)
+- Test the parse command locally instead of against an external url (:commit:`c3a6628`)
+- Patches Twisted issue while closing the connection pool on HTTPDownloadHandler (:commit:`d0bf957`)
+- Updates documentation on dynamic item classes. (:commit:`eeb589a`)
+- Merge pull request #943 from Lazar-T/patch-3 (:commit:`5fdab02`)
+- typo (:commit:`b0ae199`)
+- pywin32 is required by Twisted. closes #937 (:commit:`5cb0cfb`)
+- Update install.rst (:commit:`781286b`)
+- Merge pull request #928 from Lazar-T/patch-1 (:commit:`b415d04`)
+- comma instead of fullstop (:commit:`627b9ba`)
+- Merge pull request #885 from jsma/patch-1 (:commit:`de909ad`)
+- Update request-response.rst (:commit:`3f3263d`)
+- SgmlLinkExtractor - fix for parsing <area> tag with Unicode present (:commit:`49b40f0`)
+
+Scrapy 0.24.4 (2014-08-09)
+--------------------------
 
 - pem file is used by mockserver and required by scrapy bench (:commit:`5eddc68`)
 - scrapy bench needs scrapy.tests* (:commit:`d6cb999`)
 
-0.24.3 (2014-08-09)
--------------------
+Scrapy 0.24.3 (2014-08-09)
+--------------------------
 
 - no need to waste travis-ci time on py3 for 0.24 (:commit:`8e080c1`)
 - Update installation docs (:commit:`1d0c096`)
@@ -42,44 +3148,44 @@ Release notes
 - better testcase for settings.overrides.setdefault (:commit:`e22daaf`)
 - Using CRLF as line marker according to http 1.1 definition (:commit:`5ec430b`)
 
-0.24.2 (2014-07-08)
--------------------
+Scrapy 0.24.2 (2014-07-08)
+--------------------------
 
 - Use a mutable mapping to proxy deprecated settings.overrides and settings.defaults attribute (:commit:`e5e8133`)
 - there is not support for python3 yet (:commit:`3cd6146`)
-- Update python compatible version set to debian packages (:commit:`fa5d76b`)
+- Update python compatible version set to Debian packages (:commit:`fa5d76b`)
 - DOC fix formatting in release notes (:commit:`c6a9e20`)
 
-0.24.1 (2014-06-27)
--------------------
+Scrapy 0.24.1 (2014-06-27)
+--------------------------
 
-- Fix deprecated CrawlerSettings and increase backwards compatibility with
+- Fix deprecated CrawlerSettings and increase backward compatibility with
   .defaults attribute (:commit:`8e3f20a`)
 
 
-0.24.0 (2014-06-26)
--------------------
+Scrapy 0.24.0 (2014-06-26)
+--------------------------
 
 Enhancements
 ~~~~~~~~~~~~
 
 - Improve Scrapy top-level namespace (:issue:`494`, :issue:`684`)
 - Add selector shortcuts to responses (:issue:`554`, :issue:`690`)
-- Add new lxml based LinkExtractor to replace unmantained SgmlLinkExtractor
+- Add new lxml based LinkExtractor to replace unmaintained SgmlLinkExtractor
   (:issue:`559`, :issue:`761`, :issue:`763`)
 - Cleanup settings API - part of per-spider settings **GSoC project** (:issue:`737`)
 - Add UTF8 encoding header to templates (:issue:`688`, :issue:`762`)
 - Telnet console now binds to 127.0.0.1 by default (:issue:`699`)
-- Update debian/ubuntu install instructions (:issue:`509`, :issue:`549`)
+- Update Debian/Ubuntu install instructions (:issue:`509`, :issue:`549`)
 - Disable smart strings in lxml XPath evaluations (:issue:`535`)
 - Restore filesystem based cache as default for http
   cache middleware (:issue:`541`, :issue:`500`, :issue:`571`)
 - Expose current crawler in Scrapy shell (:issue:`557`)
 - Improve testsuite comparing CSV and XML exporters (:issue:`570`)
-- New `offsite/filtered` and `offsite/domains` stats (:issue:`566`)
+- New ``offsite/filtered`` and ``offsite/domains`` stats (:issue:`566`)
 - Support process_links as generator in CrawlSpider (:issue:`555`)
 - Verbose logging and new stats counters for DupeFilter (:issue:`553`)
-- Add a mimetype parameter to `MailSender.send()` (:issue:`602`)
+- Add a mimetype parameter to ``MailSender.send()`` (:issue:`602`)
 - Generalize file pipeline log messages (:issue:`622`)
 - Replace unencodeable codepoints with html entities in SGMLLinkExtractor (:issue:`565`)
 - Converted SEP documents to rst format (:issue:`629`, :issue:`630`,
@@ -98,21 +3204,21 @@ Enhancements
 - Make scrapy.version_info a tuple of integers (:issue:`681`, :issue:`692`)
 - Infer exporter's output format from filename extensions
   (:issue:`546`, :issue:`659`, :issue:`760`)
-- Support case-insensitive domains in `url_is_from_any_domain()` (:issue:`693`)
+- Support case-insensitive domains in ``url_is_from_any_domain()`` (:issue:`693`)
 - Remove pep8 warnings in project and spider templates (:issue:`698`)
-- Tests and docs for `request_fingerprint` function (:issue:`597`)
-- Update SEP-19 for GSoC project `per-spider settings` (:issue:`705`)
+- Tests and docs for ``request_fingerprint`` function (:issue:`597`)
+- Update SEP-19 for GSoC project ``per-spider settings`` (:issue:`705`)
 - Set exit code to non-zero when contracts fails (:issue:`727`)
-- Add a setting to control what class is instanciated as Downloader component
+- Add a setting to control what class is instantiated as Downloader component
   (:issue:`738`)
-- Pass response in `item_dropped` signal (:issue:`724`)
-- Improve `scrapy check` contracts command (:issue:`733`, :issue:`752`)
-- Document `spider.closed()` shortcut (:issue:`719`)
-- Document `request_scheduled` signal (:issue:`746`)
+- Pass response in ``item_dropped`` signal (:issue:`724`)
+- Improve ``scrapy check`` contracts command (:issue:`733`, :issue:`752`)
+- Document ``spider.closed()`` shortcut (:issue:`719`)
+- Document ``request_scheduled`` signal (:issue:`746`)
 - Add a note about reporting security issues (:issue:`697`)
 - Add LevelDB http cache storage backend (:issue:`626`, :issue:`500`)
-- Sort spider list output of `scrapy list` command (:issue:`742`) 
-- Multiple documentation enhancemens and fixes
+- Sort spider list output of ``scrapy list`` command (:issue:`742`)
+- Multiple documentation enhancements and fixes
   (:issue:`575`, :issue:`587`, :issue:`590`, :issue:`596`, :issue:`610`,
   :issue:`617`, :issue:`618`, :issue:`627`, :issue:`613`, :issue:`643`,
   :issue:`654`, :issue:`675`, :issue:`663`, :issue:`711`, :issue:`714`)
@@ -137,15 +3243,15 @@ Bugfixes
 - Testsuite doesn't require PIL anymore (:issue:`585`)
 
 
-0.22.2 (released 2014-02-14)
-----------------------------
+Scrapy 0.22.2 (released 2014-02-14)
+-----------------------------------
 
 - fix a reference to unexistent engine.slots. closes #593 (:commit:`13c099a`)
 - downloaderMW doc typo (spiderMW doc copy remnant) (:commit:`8ae11bf`)
 - Correct typos (:commit:`1346037`)
 
-0.22.1 (released 2014-02-08)
-----------------------------
+Scrapy 0.22.1 (released 2014-02-08)
+-----------------------------------
 
 - localhost666 can resolve under certain circumstances (:commit:`2ec2279`)
 - test inspect.stack failure (:commit:`cc3eda3`)
@@ -157,45 +3263,45 @@ Bugfixes
 - BaseSgmlLinkExtractor: Added unit test of a link with an inner tag (:commit:`c1cb418`)
 - BaseSgmlLinkExtractor: Fixed unknown_endtag() so that it only set current_link=None when the end tag match the opening tag (:commit:`7e4d627`)
 - Fix tests for Travis-CI build (:commit:`76c7e20`)
-- replace unencodeable codepoints with html entities. fixes #562 and #285 (:commit:`5f87b17`)
+- replace unencodable codepoints with html entities. fixes #562 and #285 (:commit:`5f87b17`)
 - RegexLinkExtractor: encode URL unicode value when creating Links (:commit:`d0ee545`)
 - Updated the tutorial crawl output with latest output. (:commit:`8da65de`)
 - Updated shell docs with the crawler reference and fixed the actual shell output. (:commit:`875b9ab`)
 - PEP8 minor edits. (:commit:`f89efaf`)
-- Expose current crawler in the scrapy shell. (:commit:`5349cec`)
+- Expose current crawler in the Scrapy shell. (:commit:`5349cec`)
 - Unused re import and PEP8 minor edits. (:commit:`387f414`)
 - Ignore None's values when using the ItemLoader. (:commit:`0632546`)
 - DOC Fixed HTTPCACHE_STORAGE typo in the default value which is now Filesystem instead Dbm. (:commit:`cde9a8c`)
-- show ubuntu setup instructions as literal code (:commit:`fb5c9c5`)
+- show Ubuntu setup instructions as literal code (:commit:`fb5c9c5`)
 - Update Ubuntu installation instructions (:commit:`70fb105`)
 - Merge pull request #550 from stray-leone/patch-1 (:commit:`6f70b6a`)
-- modify the version of scrapy ubuntu package (:commit:`725900d`)
+- modify the version of Scrapy Ubuntu package (:commit:`725900d`)
 - fix 0.22.0 release date (:commit:`af0219a`)
 - fix typos in news.rst and remove (not released yet) header (:commit:`b7f58f4`)
 
-0.22.0 (released 2014-01-17)
-----------------------------
+Scrapy 0.22.0 (released 2014-01-17)
+-----------------------------------
 
 Enhancements
 ~~~~~~~~~~~~
 
-- [**Backwards incompatible**] Switched HTTPCacheMiddleware backend to filesystem (:issue:`541`)
-  To restore old backend set `HTTPCACHE_STORAGE` to `scrapy.contrib.httpcache.DbmCacheStorage`
+- [**Backward incompatible**] Switched HTTPCacheMiddleware backend to filesystem (:issue:`541`)
+  To restore old backend set ``HTTPCACHE_STORAGE`` to ``scrapy.contrib.httpcache.DbmCacheStorage``
 - Proxy \https:// urls using CONNECT method (:issue:`392`, :issue:`397`)
 - Add a middleware to crawl ajax crawleable pages as defined by google (:issue:`343`)
 - Rename scrapy.spider.BaseSpider to scrapy.spider.Spider (:issue:`510`, :issue:`519`)
 - Selectors register EXSLT namespaces by default (:issue:`472`)
 - Unify item loaders similar to selectors renaming (:issue:`461`)
-- Make `RFPDupeFilter` class easily subclassable (:issue:`533`)
+- Make ``RFPDupeFilter`` class easily subclassable (:issue:`533`)
 - Improve test coverage and forthcoming Python 3 support (:issue:`525`)
 - Promote startup info on settings and middleware to INFO level (:issue:`520`)
-- Support partials in `get_func_args` util (:issue:`506`, issue:`504`)
-- Allow running indiviual tests via tox (:issue:`503`)
+- Support partials in ``get_func_args`` util (:issue:`506`, issue:`504`)
+- Allow running individual tests via tox (:issue:`503`)
 - Update extensions ignored by link extractors (:issue:`498`)
 - Add middleware methods to get files/images/thumbs paths (:issue:`490`)
 - Improve offsite middleware tests (:issue:`478`)
 - Add a way to skip default Referer header set by RefererMiddleware (:issue:`475`)
-- Do not send `x-gzip` in default `Accept-Encoding` header (:issue:`469`)
+- Do not send ``x-gzip`` in default ``Accept-Encoding`` header (:issue:`469`)
 - Support defining http error handling using settings (:issue:`466`)
 - Use modern python idioms wherever you find legacies (:issue:`497`)
 - Improve and correct documentation
@@ -206,31 +3312,31 @@ Fixes
 ~~~~~
 
 - Update Selector class imports in CrawlSpider template (:issue:`484`)
-- Fix unexistent reference to `engine.slots` (:issue:`464`)
-- Do not try to call `body_as_unicode()` on a non-TextResponse instance (:issue:`462`)
+- Fix unexistent reference to ``engine.slots`` (:issue:`464`)
+- Do not try to call ``body_as_unicode()`` on a non-TextResponse instance (:issue:`462`)
 - Warn when subclassing XPathItemLoader, previously it only warned on
   instantiation. (:issue:`523`)
 - Warn when subclassing XPathSelector, previously it only warned on
   instantiation. (:issue:`537`)
 - Multiple fixes to memory stats (:issue:`531`, :issue:`530`, :issue:`529`)
-- Fix overriding url in `FormRequest.from_response()` (:issue:`507`)
+- Fix overriding url in ``FormRequest.from_response()`` (:issue:`507`)
 - Fix tests runner under pip 1.5 (:issue:`513`)
 - Fix logging error when spider name is unicode (:issue:`479`)
 
-0.20.2 (released 2013-12-09)
-----------------------------
+Scrapy 0.20.2 (released 2013-12-09)
+-----------------------------------
 
 - Update CrawlSpider Template with Selector changes (:commit:`6d1457d`)
 - fix method name in tutorial. closes GH-480 (:commit:`b4fc359`
 
-0.20.1 (released 2013-11-28)
-----------------------------
+Scrapy 0.20.1 (released 2013-11-28)
+-----------------------------------
 
 - include_package_data is required to build wheels from published sources (:commit:`5ba1ad5`)
 - process_parallel was leaking the failures on its internal deferreds.  closes #458 (:commit:`419a780`)
 
-0.20.0 (released 2013-11-08)
-----------------------------
+Scrapy 0.20.0 (released 2013-11-08)
+-----------------------------------
 
 Enhancements
 ~~~~~~~~~~~~
@@ -240,24 +3346,24 @@ Enhancements
   (modifying them had been deprecated for a long time)
 - :setting:`ITEM_PIPELINES` is now defined as a dict (instead of a list)
 - Sitemap spider can fetch alternate URLs (:issue:`360`)
-- `Selector.remove_namespaces()` now remove namespaces from element's attributes. (:issue:`416`)
+- ``Selector.remove_namespaces()`` now remove namespaces from element's attributes. (:issue:`416`)
 - Paved the road for Python 3.3+ (:issue:`435`, :issue:`436`, :issue:`431`, :issue:`452`)
 - New item exporter using native python types with nesting support (:issue:`366`)
 - Tune HTTP1.1 pool size so it matches concurrency defined by settings (:commit:`b43b5f575`)
 - scrapy.mail.MailSender now can connect over TLS or upgrade using STARTTLS (:issue:`327`)
 - New FilesPipeline with functionality factored out from ImagesPipeline (:issue:`370`, :issue:`409`)
 - Recommend Pillow instead of PIL for image handling (:issue:`317`)
-- Added debian packages for Ubuntu quantal and raring (:commit:`86230c0`)
+- Added Debian packages for Ubuntu Quantal and Raring (:commit:`86230c0`)
 - Mock server (used for tests) can listen for HTTPS requests (:issue:`410`)
 - Remove multi spider support from multiple core components
   (:issue:`422`, :issue:`421`, :issue:`420`, :issue:`419`, :issue:`423`, :issue:`418`)
-- Travis-CI now tests Scrapy changes against development versions of `w3lib` and `queuelib` python packages.
+- Travis-CI now tests Scrapy changes against development versions of ``w3lib`` and ``queuelib`` python packages.
 - Add pypy 2.1 to continuous integration tests (:commit:`ecfa7431`)
 - Pylinted, pep8 and removed old-style exceptions from source (:issue:`430`, :issue:`432`)
 - Use importlib for parametric imports (:issue:`445`)
 - Handle a regression introduced in Python 2.7.5 that affects XmlItemExporter (:issue:`372`)
 - Bugfix crawling shutdown on SIGINT (:issue:`450`)
-- Do not submit `reset` type inputs in FormRequest.from_response (:commit:`b326b87`)
+- Do not submit ``reset`` type inputs in FormRequest.from_response (:commit:`b326b87`)
 - Do not silence download errors when request errback raises an exception (:commit:`684cfc0`)
 
 Bugfixes
@@ -266,23 +3372,23 @@ Bugfixes
 - Fix tests under Django 1.6 (:commit:`b6bed44c`)
 - Lot of bugfixes to retry middleware under disconnections using HTTP 1.1 download handler
 - Fix inconsistencies among Twisted releases (:issue:`406`)
-- Fix scrapy shell bugs (:issue:`418`, :issue:`407`)
+- Fix Scrapy shell bugs (:issue:`418`, :issue:`407`)
 - Fix invalid variable name in setup.py (:issue:`429`)
 - Fix tutorial references (:issue:`387`)
 - Improve request-response docs (:issue:`391`)
 - Improve best practices docs (:issue:`399`, :issue:`400`, :issue:`401`, :issue:`402`)
 - Improve django integration docs (:issue:`404`)
-- Document `bindaddress` request meta (:commit:`37c24e01d7`)
-- Improve `Request` class documentation (:issue:`226`)
+- Document ``bindaddress`` request meta (:commit:`37c24e01d7`)
+- Improve ``Request`` class documentation (:issue:`226`)
 
 Other
 ~~~~~
 
 - Dropped Python 2.6 support (:issue:`448`)
-- Add `cssselect`_ python package as install dependency
+- Add :doc:`cssselect <cssselect:index>` python package as install dependency
 - Drop libxml2 and multi selector's backend support, `lxml`_ is required from now on.
 - Minimum Twisted version increased to 10.0.0, dropped Twisted 8.0 support.
-- Running test suite now requires `mock` python library (:issue:`390`)
+- Running test suite now requires ``mock`` python library (:issue:`390`)
 
 
 Thanks
@@ -317,15 +3423,15 @@ List of contributors sorted by number of commits::
       1 cacovsky <amarquesferraz@...>
       1 Berend Iwema <berend@...>
 
-0.18.4 (released 2013-10-10)
-----------------------------
+Scrapy 0.18.4 (released 2013-10-10)
+-----------------------------------
 
 - IPython refuses to update the namespace. fix #396 (:commit:`3d32c4f`)
 - Fix AlreadyCalledError replacing a request in shell command. closes #407 (:commit:`b1d8919`)
 - Fix start_requests laziness and early hangs (:commit:`89faf52`)
 
-0.18.3 (released 2013-10-03)
-----------------------------
+Scrapy 0.18.3 (released 2013-10-03)
+-----------------------------------
 
 - fix regression on lazy evaluation of start requests (:commit:`12693a5`)
 - forms: do not submit reset inputs (:commit:`e429f63`)
@@ -333,14 +3439,14 @@ List of contributors sorted by number of commits::
 - backport master fixes to json exporter (:commit:`cfc2d46`)
 - Fix permission and set umask before generating sdist tarball (:commit:`06149e0`)
 
-0.18.2 (released 2013-09-03)
-----------------------------
+Scrapy 0.18.2 (released 2013-09-03)
+-----------------------------------
 
-- Backport `scrapy check` command fixes and backward compatible multi
+- Backport ``scrapy check`` command fixes and backward compatible multi
   crawler process(:issue:`339`)
 
-0.18.1 (released 2013-08-27)
-----------------------------
+Scrapy 0.18.1 (released 2013-08-27)
+-----------------------------------
 
 - remove extra import added by cherry picked changes (:commit:`d20304e`)
 - fix crawling tests under twisted pre 11.0.0 (:commit:`1994f38`)
@@ -348,63 +3454,63 @@ List of contributors sorted by number of commits::
 - test PotentiaDataLoss errors on unbound responses (:commit:`b15470d`)
 - Treat responses without content-length or Transfer-Encoding as good responses (:commit:`c4bf324`)
 - do no include ResponseFailed if http11 handler is not enabled (:commit:`6cbe684`)
-- New HTTP client wraps connection losts in ResponseFailed exception. fix #373 (:commit:`1a20bba`)
+- New HTTP client wraps connection lost in ResponseFailed exception. fix #373 (:commit:`1a20bba`)
 - limit travis-ci build matrix (:commit:`3b01bb8`)
 - Merge pull request #375 from peterarenot/patch-1 (:commit:`fa766d7`)
 - Fixed so it refers to the correct folder (:commit:`3283809`)
-- added quantal & raring to support ubuntu releases (:commit:`1411923`)
+- added Quantal & Raring to support Ubuntu releases (:commit:`1411923`)
 - fix retry middleware which didn't retry certain connection errors after the upgrade to http1 client, closes GH-373 (:commit:`bb35ed0`)
 - fix XmlItemExporter in Python 2.7.4 and 2.7.5 (:commit:`de3e451`)
 - minor updates to 0.18 release notes (:commit:`c45e5f1`)
-- fix contributters list format (:commit:`0b60031`)
+- fix contributors list format (:commit:`0b60031`)
 
-0.18.0 (released 2013-08-09)
-----------------------------
+Scrapy 0.18.0 (released 2013-08-09)
+-----------------------------------
 
 - Lot of improvements to testsuite run using Tox, including a way to test on pypi
 - Handle GET parameters for AJAX crawleable urls (:commit:`3fe2a32`)
 - Use lxml recover option to parse sitemaps (:issue:`347`)
 - Bugfix cookie merging by hostname and not by netloc (:issue:`352`)
-- Support disabling `HttpCompressionMiddleware` using a flag setting (:issue:`359`)
-- Support xml namespaces using `iternodes` parser in `XMLFeedSpider` (:issue:`12`)
-- Support `dont_cache` request meta flag (:issue:`19`)
-- Bugfix `scrapy.utils.gz.gunzip` broken by changes in python 2.7.4 (:commit:`4dc76e`)
-- Bugfix url encoding on `SgmlLinkExtractor` (:issue:`24`)
-- Bugfix `TakeFirst` processor shouldn't discard zero (0) value (:issue:`59`)
+- Support disabling ``HttpCompressionMiddleware`` using a flag setting (:issue:`359`)
+- Support xml namespaces using ``iternodes`` parser in ``XMLFeedSpider`` (:issue:`12`)
+- Support ``dont_cache`` request meta flag (:issue:`19`)
+- Bugfix ``scrapy.utils.gz.gunzip`` broken by changes in python 2.7.4 (:commit:`4dc76e`)
+- Bugfix url encoding on ``SgmlLinkExtractor`` (:issue:`24`)
+- Bugfix ``TakeFirst`` processor shouldn't discard zero (0) value (:issue:`59`)
 - Support nested items in xml exporter (:issue:`66`)
 - Improve cookies handling performance (:issue:`77`)
 - Log dupe filtered requests once (:issue:`105`)
 - Split redirection middleware into status and meta based middlewares (:issue:`78`)
 - Use HTTP1.1 as default downloader handler (:issue:`109` and :issue:`318`)
-- Support xpath form selection on `FormRequest.from_response` (:issue:`185`)
-- Bugfix unicode decoding error on `SgmlLinkExtractor` (:issue:`199`)
+- Support xpath form selection on ``FormRequest.from_response`` (:issue:`185`)
+- Bugfix unicode decoding error on ``SgmlLinkExtractor`` (:issue:`199`)
 - Bugfix signal dispatching on pypi interpreter (:issue:`205`)
 - Improve request delay and concurrency handling (:issue:`206`)
-- Add RFC2616 cache policy to `HttpCacheMiddleware` (:issue:`212`)
+- Add RFC2616 cache policy to ``HttpCacheMiddleware`` (:issue:`212`)
 - Allow customization of messages logged by engine (:issue:`214`)
-- Multiples improvements to `DjangoItem` (:issue:`217`, :issue:`218`, :issue:`221`)
+- Multiples improvements to ``DjangoItem`` (:issue:`217`, :issue:`218`, :issue:`221`)
 - Extend Scrapy commands using setuptools entry points (:issue:`260`)
-- Allow spider `allowed_domains` value to be set/tuple (:issue:`261`)
-- Support `settings.getdict` (:issue:`269`)
-- Simplify internal `scrapy.core.scraper` slot handling (:issue:`271`)
-- Added `Item.copy` (:issue:`290`)
+- Allow spider ``allowed_domains`` value to be set/tuple (:issue:`261`)
+- Support ``settings.getdict`` (:issue:`269`)
+- Simplify internal ``scrapy.core.scraper`` slot handling (:issue:`271`)
+- Added ``Item.copy`` (:issue:`290`)
 - Collect idle downloader slots (:issue:`297`)
-- Add `ftp://` scheme downloader handler (:issue:`329`)
+- Add ``ftp://`` scheme downloader handler (:issue:`329`)
 - Added downloader benchmark webserver and spider tools :ref:`benchmarking`
-- Moved persistent (on disk) queues to a separate project (queuelib_) which scrapy now depends on
-- Add scrapy commands using external libraries (:issue:`260`)
+- Moved persistent (on disk) queues to a separate project (queuelib_) which Scrapy now depends on
+- Add Scrapy commands using external libraries (:issue:`260`)
 - Added ``--pdb`` option to ``scrapy`` command line tool
-- Added :meth:`XPathSelector.remove_namespaces` which allows to remove all namespaces from XML documents for convenience (to work with namespace-less XPaths). Documented in :ref:`topics-selectors`.
+- Added :meth:`XPathSelector.remove_namespaces <scrapy.selector.Selector.remove_namespaces>` which allows to remove all namespaces from XML documents for convenience (to work with namespace-less XPaths). Documented in :ref:`topics-selectors`.
 - Several improvements to spider contracts
 - New default middleware named MetaRefreshMiddldeware that handles meta-refresh html tag redirections,
 - MetaRefreshMiddldeware and RedirectMiddleware have different priorities to address #62
 - added from_crawler method to spiders
 - added system tests with mock server
-- more improvements to Mac OS compatibility (thanks Alex Cepoi)
+- more improvements to macOS compatibility (thanks Alex Cepoi)
 - several more cleanups to singletons and multi-spider support (thanks Nicolas Ramirez)
 - support custom download slots
 - added --spider option to "shell" command.
-- log overridden settings when scrapy starts
+- log overridden settings when Scrapy starts
 
 Thanks to everyone who contribute to this release. Here is a list of
 contributors sorted by number of commits::
@@ -450,33 +3556,33 @@ contributors sorted by number of commits::
       1 Berend Iwema <berend@...>
 
 
-0.16.5 (released 2013-05-30)
-----------------------------
+Scrapy 0.16.5 (released 2013-05-30)
+-----------------------------------
 
-- obey request method when scrapy deploy is redirected to a new endpoint (:commit:`8c4fcee`)
+- obey request method when Scrapy deploy is redirected to a new endpoint (:commit:`8c4fcee`)
 - fix inaccurate downloader middleware documentation. refs #280 (:commit:`40667cb`)
 - doc: remove links to diveintopython.org, which is no longer available. closes #246 (:commit:`bd58bfa`)
 - Find form nodes in invalid html5 documents (:commit:`e3d6945`)
 - Fix typo labeling attrs type bool instead of list (:commit:`a274276`)
 
-0.16.4 (released 2013-01-23)
-----------------------------
+Scrapy 0.16.4 (released 2013-01-23)
+-----------------------------------
 
 - fixes spelling errors in documentation (:commit:`6d2b3aa`)
 - add doc about disabling an extension. refs #132 (:commit:`c90de33`)
 - Fixed error message formatting. log.err() doesn't support cool formatting and when error occurred, the message was:    "ERROR: Error processing %(item)s" (:commit:`c16150c`)
 - lint and improve images pipeline error logging (:commit:`56b45fc`)
 - fixed doc typos (:commit:`243be84`)
-- add documentation topics: Broad Crawls & Common Practies (:commit:`1fbb715`)
-- fix bug in scrapy parse command when spider is not specified explicitly. closes #209 (:commit:`c72e682`)
+- add documentation topics: Broad Crawls & Common Practices (:commit:`1fbb715`)
+- fix bug in Scrapy parse command when spider is not specified explicitly. closes #209 (:commit:`c72e682`)
 - Update docs/topics/commands.rst (:commit:`28eac7a`)
 
-0.16.3 (released 2012-12-07)
-----------------------------
+Scrapy 0.16.3 (released 2012-12-07)
+-----------------------------------
 
 - Remove concurrency limitation when using download delays and still ensure inter-request delays are enforced (:commit:`487b9b5`)
 - add error details when image pipeline fails (:commit:`8232569`)
-- improve mac os compatibility (:commit:`8dcf8aa`)
+- improve macOS compatibility (:commit:`8dcf8aa`)
 - setup.py: use README.rst to populate long_description (:commit:`7b5310d`)
 - doc: removed obsolete references to ClientForm (:commit:`80f9bb6`)
 - correct docs for default storage backend (:commit:`2aa491b`)
@@ -484,51 +3590,50 @@ contributors sorted by number of commits::
 - Fixed docs typo in SpiderOpenCloseLogging example (:commit:`7184094`)
 
 
-0.16.2 (released 2012-11-09)
-----------------------------
+Scrapy 0.16.2 (released 2012-11-09)
+-----------------------------------
 
-- scrapy contracts: python2.6 compat (:commit:`a4a9199`)
-- scrapy contracts verbose option (:commit:`ec41673`)
-- proper unittest-like output for scrapy contracts (:commit:`86635e4`)
+- Scrapy contracts: python2.6 compat (:commit:`a4a9199`)
+- Scrapy contracts verbose option (:commit:`ec41673`)
+- proper unittest-like output for Scrapy contracts (:commit:`86635e4`)
 - added open_in_browser to debugging doc (:commit:`c9b690d`)
-- removed reference to global scrapy stats from settings doc (:commit:`dd55067`)
+- removed reference to global Scrapy stats from settings doc (:commit:`dd55067`)
 - Fix SpiderState bug in Windows platforms (:commit:`58998f4`)
 
 
-0.16.1 (released 2012-10-26)
-----------------------------
+Scrapy 0.16.1 (released 2012-10-26)
+-----------------------------------
 
 - fixed LogStats extension, which got broken after a wrong merge before the 0.16 release (:commit:`8c780fd`)
-- better backwards compatibility for scrapy.conf.settings (:commit:`3403089`)
+- better backward compatibility for scrapy.conf.settings (:commit:`3403089`)
 - extended documentation on how to access crawler stats from extensions (:commit:`c4da0b5`)
-- removed .hgtags (no longer needed now that scrapy uses git) (:commit:`d52c188`)
+- removed .hgtags (no longer needed now that Scrapy uses git) (:commit:`d52c188`)
 - fix dashes under rst headers (:commit:`fa4f7f9`)
 - set release date for 0.16.0 in news (:commit:`e292246`)
 
 
-0.16.0 (released 2012-10-18)
-----------------------------
+Scrapy 0.16.0 (released 2012-10-18)
+-----------------------------------
 
 Scrapy changes:
 
 - added :ref:`topics-contracts`, a mechanism for testing spiders in a formal/reproducible way
 - added options ``-o`` and ``-t`` to the :command:`runspider` command
 - documented :doc:`topics/autothrottle` and added to extensions installed by default. You still need to enable it with :setting:`AUTOTHROTTLE_ENABLED`
-- major Stats Collection refactoring: removed separation of global/per-spider stats, removed stats-related signals (``stats_spider_opened``, etc). Stats are much simpler now, backwards compatibility is kept on the Stats Collector API and signals.
-- added :meth:`~scrapy.contrib.spidermiddleware.SpiderMiddleware.process_start_requests` method to spider middlewares
-- dropped Signals singleton. Signals should now be accesed through the Crawler.signals attribute. See the signals documentation for more info.
-- dropped Signals singleton. Signals should now be accesed through the Crawler.signals attribute. See the signals documentation for more info.
+- major Stats Collection refactoring: removed separation of global/per-spider stats, removed stats-related signals (``stats_spider_opened``, etc). Stats are much simpler now, backward compatibility is kept on the Stats Collector API and signals.
+- added :meth:`~scrapy.spidermiddlewares.SpiderMiddleware.process_start_requests` method to spider middlewares
+- dropped Signals singleton. Signals should now be accessed through the Crawler.signals attribute. See the signals documentation for more info.
 - dropped Stats Collector singleton. Stats can now be accessed through the Crawler.stats attribute. See the stats collection documentation for more info.
 - documented :ref:`topics-api`
-- `lxml` is now the default selectors backend instead of `libxml2`
+- ``lxml`` is now the default selectors backend instead of ``libxml2``
 - ported FormRequest.from_response() to use `lxml`_ instead of `ClientForm`_
 - removed modules: ``scrapy.xlib.BeautifulSoup`` and ``scrapy.xlib.ClientForm``
 - SitemapSpider: added support for sitemap urls ending in .xml and .xml.gz, even if they advertise a wrong content type (:commit:`10ed28b`)
 - StackTraceDump extension: also dump trackref live references (:commit:`fe2ce93`)
 - nested items now fully supported in JSON and JSONLines exporters
 - added :reqmeta:`cookiejar` Request meta key to support multiple cookie sessions per spider
-- decoupled encoding detection code to `w3lib.encoding`_, and ported Scrapy code to use that mdule
-- dropped support for Python 2.5. See http://blog.scrapinghub.com/2012/02/27/scrapy-0-15-dropping-support-for-python-2-5/
+- decoupled encoding detection code to `w3lib.encoding`_, and ported Scrapy code to use that module
+- dropped support for Python 2.5. See https://blog.scrapinghub.com/2012/02/27/scrapy-0-15-dropping-support-for-python-2-5/
 - dropped support for Twisted 2.5
 - added :setting:`REFERER_ENABLED` setting, to control referer middleware
 - changed default user agent to: ``Scrapy/VERSION (+http://scrapy.org)``
@@ -539,7 +3644,7 @@ Scrapy changes:
 - removed ``ENCODING_ALIASES`` setting, as encoding auto-detection has been moved to the `w3lib`_ library
 - promoted :ref:`topics-djangoitem` to main contrib
 - LogFormatter method now return dicts(instead of strings) to support lazy formatting (:issue:`164`, :commit:`dcef7b0`)
-- downloader handlers (:setting:`DOWNLOAD_HANDLERS` setting) now receive settings as the first argument of the constructor
+- downloader handlers (:setting:`DOWNLOAD_HANDLERS` setting) now receive settings as the first argument of the ``__init__`` method
 - replaced memory usage acounting with (more portable) `resource`_ module, removed ``scrapy.utils.memory`` module
 - removed signal: ``scrapy.mail.mail_sent``
 - removed ``TRACK_REFS`` setting, now :ref:`trackrefs <topics-leaks-trackrefs>` is always enabled
@@ -548,30 +3653,30 @@ Scrapy changes:
 - number received responses are now tracked through Scrapy stats (stat name: ``response_received_count``)
 - removed ``scrapy.log.started`` attribute
 
-0.14.4
-------
+Scrapy 0.14.4
+-------------
 
-- added precise to supported ubuntu distros (:commit:`b7e46df`)
-- fixed bug in json-rpc webservice reported in https://groups.google.com/d/topic/scrapy-users/qgVBmFybNAQ/discussion. also removed no longer supported 'run' command from extras/scrapy-ws.py (:commit:`340fbdb`)
+- added precise to supported Ubuntu distros (:commit:`b7e46df`)
+- fixed bug in json-rpc webservice reported in https://groups.google.com/forum/#!topic/scrapy-users/qgVBmFybNAQ/discussion. also removed no longer supported 'run' command from extras/scrapy-ws.py (:commit:`340fbdb`)
 - meta tag attributes for content-type http equiv can be in any order. #123 (:commit:`0cb68af`)
 - replace "import Image" by more standard "from PIL import Image". closes #88 (:commit:`4d17048`)
 - return trial status as bin/runtests.sh exit value. #118 (:commit:`b7b2e7f`)
 
-0.14.3
-------
+Scrapy 0.14.3
+-------------
 
 - forgot to include pydispatch license. #118 (:commit:`fd85f9c`)
 - include egg files used by testsuite in source distribution. #118 (:commit:`c897793`)
 - update docstring in project template to avoid confusion with genspider command, which may be considered as an advanced feature. refs #107 (:commit:`2548dcc`)
 - added note to docs/topics/firebug.rst about google directory being shut down (:commit:`668e352`)
-- dont discard slot when empty, just save in another dict in order to recycle if needed again. (:commit:`8e9f607`)
+- don't discard slot when empty, just save in another dict in order to recycle if needed again. (:commit:`8e9f607`)
 - do not fail handling unicode xpaths in libxml2 backed selectors (:commit:`b830e95`)
 - fixed minor mistake in Request objects documentation (:commit:`bf3c9ee`)
 - fixed minor defect in link extractors documentation (:commit:`ba14f38`)
-- removed some obsolete remaining code related to sqlite support in scrapy (:commit:`0665175`)
+- removed some obsolete remaining code related to sqlite support in Scrapy (:commit:`0665175`)
 
-0.14.2
-------
+Scrapy 0.14.2
+-------------
 
 - move buffer pointing to start of file before computing checksum. refs #92 (:commit:`6a5bef2`)
 - Compute image checksum before persisting images. closes #92 (:commit:`9817df1`)
@@ -585,8 +3690,8 @@ Scrapy changes:
 - scrapyd: fixed documentation link (:commit:`2b4e4c3`)
 - extras/makedeb.py: no longer obtaining version from git (:commit:`caffe0e`)
 
-0.14.1
-------
+Scrapy 0.14.1
+-------------
 
 - extras/makedeb.py: no longer obtaining version from git (:commit:`caffe0e`)
 - bumped version to 0.14.1 (:commit:`6cb9e1c`)
@@ -600,8 +3705,8 @@ Scrapy changes:
 - Avoid _disconnectedDeferred AttributeError exception in Twisted>=11.1.0 (:commit:`98f3f87`)
 - allow spider to set autothrottle max concurrency (:commit:`175a4b5`)
 
-0.14
-----
+Scrapy 0.14
+-----------
 
 New features and settings
 ~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -613,16 +3718,16 @@ New features and settings
 - New ``ChunkedTransferMiddleware`` (enabled by default) to support `chunked transfer encoding`_ (:rev:`2769`)
 - Add boto 2.0 support for S3 downloader handler (:rev:`2763`)
 - Added `marshal`_ to formats supported by feed exports (:rev:`2744`)
-- In request errbacks, offending requests are now received in `failure.request` attribute (:rev:`2738`)
+- In request errbacks, offending requests are now received in ``failure.request`` attribute (:rev:`2738`)
 - Big downloader refactoring to support per domain/ip concurrency limits (:rev:`2732`)
    - ``CONCURRENT_REQUESTS_PER_SPIDER`` setting has been deprecated and replaced by:
       - :setting:`CONCURRENT_REQUESTS`, :setting:`CONCURRENT_REQUESTS_PER_DOMAIN`, :setting:`CONCURRENT_REQUESTS_PER_IP`
    - check the documentation for more details
 - Added builtin caching DNS resolver (:rev:`2728`)
 - Moved Amazon AWS-related components/extensions (SQS spider queue, SimpleDB stats collector) to a separate project: [scaws](https://github.com/scrapinghub/scaws) (:rev:`2706`, :rev:`2714`)
-- Moved spider queues to scrapyd: `scrapy.spiderqueue` -> `scrapyd.spiderqueue` (:rev:`2708`)
-- Moved sqlite utils to scrapyd: `scrapy.utils.sqlite` -> `scrapyd.sqlite` (:rev:`2781`)
-- Real support for returning iterators on `start_requests()` method. The iterator is now consumed during the crawl when the spider is getting idle (:rev:`2704`)
+- Moved spider queues to scrapyd: ``scrapy.spiderqueue`` -> ``scrapyd.spiderqueue`` (:rev:`2708`)
+- Moved sqlite utils to scrapyd: ``scrapy.utils.sqlite`` -> ``scrapyd.sqlite`` (:rev:`2781`)
+- Real support for returning iterators on ``start_requests()`` method. The iterator is now consumed during the crawl when the spider is getting idle (:rev:`2704`)
 - Added :setting:`REDIRECT_ENABLED` setting to quickly enable/disable the redirect middleware (:rev:`2697`)
 - Added :setting:`RETRY_ENABLED` setting to quickly enable/disable the retry middleware (:rev:`2694`)
 - Added ``CloseSpider`` exception to manually close spiders (:rev:`2691`)
@@ -630,19 +3735,19 @@ New features and settings
 - Refactored close spider behavior to wait for all downloads to finish and be processed by spiders, before closing the spider (:rev:`2688`)
 - Added ``SitemapSpider`` (see documentation in Spiders page) (:rev:`2658`)
 - Added ``LogStats`` extension for periodically logging basic stats (like crawled pages and scraped items) (:rev:`2657`)
-- Make handling of gzipped responses more robust (#319, :rev:`2643`). Now Scrapy will try and decompress as much as possible from a gzipped response, instead of failing with an `IOError`.
+- Make handling of gzipped responses more robust (#319, :rev:`2643`). Now Scrapy will try and decompress as much as possible from a gzipped response, instead of failing with an ``IOError``.
 - Simplified !MemoryDebugger extension to use stats for dumping memory debugging info (:rev:`2639`)
-- Added new command to edit spiders: ``scrapy edit`` (:rev:`2636`) and `-e` flag to `genspider` command that uses it (:rev:`2653`)
+- Added new command to edit spiders: ``scrapy edit`` (:rev:`2636`) and ``-e`` flag to ``genspider`` command that uses it (:rev:`2653`)
 - Changed default representation of items to pretty-printed dicts. (:rev:`2631`). This improves default logging by making log more readable in the default case, for both Scraped and Dropped lines.
 - Added :signal:`spider_error` signal (:rev:`2628`)
 - Added :setting:`COOKIES_ENABLED` setting (:rev:`2625`)
-- Stats are now dumped to Scrapy log (default value of :setting:`STATS_DUMP` setting has been changed to `True`). This is to make Scrapy users more aware of Scrapy stats and the data that is collected there.
+- Stats are now dumped to Scrapy log (default value of :setting:`STATS_DUMP` setting has been changed to ``True``). This is to make Scrapy users more aware of Scrapy stats and the data that is collected there.
 - Added support for dynamically adjusting download delay and maximum concurrent requests (:rev:`2599`)
 - Added new DBM HTTP cache storage backend (:rev:`2576`)
 - Added ``listjobs.json`` API to Scrapyd (:rev:`2571`)
 - ``CsvItemExporter``: added ``join_multivalued`` parameter (:rev:`2578`)
 - Added namespace support to ``xmliter_lxml`` (:rev:`2552`)
-- Improved cookies middleware by making `COOKIES_DEBUG` nicer and documenting it (:rev:`2579`)
+- Improved cookies middleware by making ``COOKIES_DEBUG`` nicer and documenting it (:rev:`2579`)
 - Several improvements to Scrapyd and Link extractors
 
 Code rearranged and removed
@@ -656,27 +3761,28 @@ Code rearranged and removed
 - Reduced Scrapy codebase by striping part of Scrapy code into two new libraries:
    - `w3lib`_ (several functions from ``scrapy.utils.{http,markup,multipart,response,url}``, done in :rev:`2584`)
    - `scrapely`_ (was ``scrapy.contrib.ibl``, done in :rev:`2586`)
-- Removed unused function: `scrapy.utils.request.request_info()` (:rev:`2577`)
-- Removed googledir project from `examples/googledir`. There's now a new example project called `dirbot` available on github: https://github.com/scrapy/dirbot
+- Removed unused function: ``scrapy.utils.request.request_info()`` (:rev:`2577`)
+- Removed googledir project from ``examples/googledir``. There's now a new example project called ``dirbot`` available on GitHub: https://github.com/scrapy/dirbot
 - Removed support for default field values in Scrapy items (:rev:`2616`)
 - Removed experimental crawlspider v2 (:rev:`2632`)
-- Removed scheduler middleware to simplify architecture. Duplicates filter is now done in the scheduler itself, using the same dupe fltering class as before (`DUPEFILTER_CLASS` setting) (:rev:`2640`)
+- Removed scheduler middleware to simplify architecture. Duplicates filter is now done in the scheduler itself, using the same dupe fltering class as before (``DUPEFILTER_CLASS`` setting) (:rev:`2640`)
 - Removed support for passing urls to ``scrapy crawl`` command (use ``scrapy parse`` instead) (:rev:`2704`)
 - Removed deprecated Execution Queue (:rev:`2704`)
 - Removed (undocumented) spider context extension (from scrapy.contrib.spidercontext) (:rev:`2780`)
 - removed ``CONCURRENT_SPIDERS`` setting (use scrapyd maxproc instead) (:rev:`2789`)
 - Renamed attributes of core components: downloader.sites -> downloader.slots, scraper.sites -> scraper.slots (:rev:`2717`, :rev:`2718`)
-- Renamed setting ``CLOSESPIDER_ITEMPASSED`` to :setting:`CLOSESPIDER_ITEMCOUNT` (:rev:`2655`). Backwards compatibility kept.
+- Renamed setting ``CLOSESPIDER_ITEMPASSED`` to :setting:`CLOSESPIDER_ITEMCOUNT` (:rev:`2655`). Backward compatibility kept.
 
-0.12
-----
+Scrapy 0.12
+-----------
 
 The numbers like #NNN reference tickets in the old issue tracker (Trac) which is no longer available.
 
 New features and improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-- Passed item is now sent in the ``item`` argument of the :signal:`item_passed` (#273)
+- Passed item is now sent in the ``item`` argument of the :signal:`item_passed
+  <item_scraped>` (#273)
 - Added verbose option to ``scrapy version`` command, useful for bug reports (#298)
 - HTTP cache now stored by default in the project data dir (#279)
 - Added project data storage directory (#276, #277)
@@ -696,13 +3802,13 @@ Scrapyd changes
 - Scrapyd now uses one process per spider
 - It stores one log file per spider run, and rotate them keeping the lastest 5 logs per spider (by default)
 - A minimal web ui was added, available at http://localhost:6800 by default
-- There is now a `scrapy server` command to start a Scrapyd server of the current project
+- There is now a ``scrapy server`` command to start a Scrapyd server of the current project
 
 Changes to settings
 ~~~~~~~~~~~~~~~~~~~
 
-- added `HTTPCACHE_ENABLED` setting (False by default) to enable HTTP cache middleware
-- changed `HTTPCACHE_EXPIRATION_SECS` semantics: now zero means "never expire".
+- added ``HTTPCACHE_ENABLED`` setting (False by default) to enable HTTP cache middleware
+- changed ``HTTPCACHE_EXPIRATION_SECS`` semantics: now zero means "never expire".
 
 Deprecated/obsoleted functionality
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -711,8 +3817,8 @@ Deprecated/obsoleted functionality
 - Deprecated ``queue`` command in favor of using Scrapyd ``schedule.json`` API. See also: Scrapyd changes
 - Removed the !LxmlItemLoader (experimental contrib which never graduated to main contrib)
 
-0.10
-----
+Scrapy 0.10
+-----------
 
 The numbers like #NNN reference tickets in the old issue tracker (Trac) which is no longer available.
 
@@ -733,17 +3839,17 @@ New features and improvements
 - Splitted Debian package into two packages - the library and the service (#187)
 - Scrapy log refactoring (#188)
 - New extension for keeping persistent spider contexts among different runs (#203)
-- Added `dont_redirect` request.meta key for avoiding redirects (#233)
-- Added `dont_retry` request.meta key for avoiding retries (#234)
+- Added ``dont_redirect`` request.meta key for avoiding redirects (#233)
+- Added ``dont_retry`` request.meta key for avoiding retries (#234)
 
 Command-line tool changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
-- New `scrapy` command which replaces the old `scrapy-ctl.py` (#199)
-  - there is only one global `scrapy` command now, instead of one `scrapy-ctl.py` per project
-  - Added `scrapy.bat` script for running more conveniently from Windows
+- New ``scrapy`` command which replaces the old ``scrapy-ctl.py`` (#199)
+  - there is only one global ``scrapy`` command now, instead of one ``scrapy-ctl.py`` per project
+  - Added ``scrapy.bat`` script for running more conveniently from Windows
 - Added bash completion to command-line tool (#210)
-- Renamed command `start` to `runserver` (#209)
+- Renamed command ``start`` to ``runserver`` (#209)
 
 API changes
 ~~~~~~~~~~~
@@ -752,7 +3858,7 @@ API changes
 - ``Request.copy()`` and ``Request.replace()`` now also copies their ``callback`` and ``errback`` attributes (#231)
 - Removed ``UrlFilterMiddleware`` from ``scrapy.contrib`` (already disabled by default)
 - Offsite middelware doesn't filter out any request coming from a spider that doesn't have a allowed_domains attribute (#225)
-- Removed Spider Manager ``load()`` method. Now spiders are loaded in the constructor itself.
+- Removed Spider Manager ``load()`` method. Now spiders are loaded in the ``__init__`` method itself.
 - Changes to Scrapy Manager (now called "Crawler"):
    - ``scrapy.core.manager.ScrapyManager`` class renamed to ``scrapy.crawler.Crawler``
    - ``scrapy.core.manager.scrapymanager`` singleton moved to ``scrapy.project.crawler``
@@ -763,11 +3869,11 @@ API changes
    - ``scrapy.stats.collector.SimpledbStatsCollector`` to ``scrapy.contrib.statscol.SimpledbStatsCollector``
 - default per-command settings are now specified in the ``default_settings`` attribute of command object class (#201)
 - changed arguments of Item pipeline ``process_item()`` method from ``(spider, item)`` to ``(item, spider)``
-   - backwards compatibility kept (with deprecation warning)
+   - backward compatibility kept (with deprecation warning)
 - moved ``scrapy.core.signals`` module to ``scrapy.signals``
-   - backwards compatibility kept (with deprecation warning)
+   - backward compatibility kept (with deprecation warning)
 - moved ``scrapy.core.exceptions`` module to ``scrapy.exceptions``
-   - backwards compatibility kept (with deprecation warning)
+   - backward compatibility kept (with deprecation warning)
 - added ``handles_request()`` class method to ``BaseSpider``
 - dropped ``scrapy.log.exc()`` function (use ``scrapy.log.err()`` instead)
 - dropped ``component`` argument of ``scrapy.log.msg()`` function
@@ -784,8 +3890,8 @@ Changes to settings
 - Removed ``COMMANDS_SETTINGS_MODULE`` setting (#201)
 - Renamed ``REQUEST_HANDLERS`` to ``DOWNLOAD_HANDLERS`` and make download handlers classes (instead of functions)
 
-0.9
----
+Scrapy 0.9
+----------
 
 The numbers like #NNN reference tickets in the old issue tracker (Trac) which is no longer available.
 
@@ -824,8 +3930,8 @@ Changes to default settings
 
 - Changed default ``SCHEDULER_ORDER`` to ``DFO`` (:rev:`1939`)
 
-0.8
----
+Scrapy 0.8
+----------
 
 The numbers like #NNN reference tickets in the old issue tracker (Trac) which is no longer available.
 
@@ -836,10 +3942,10 @@ New features
 - Added ``dont_click`` argument to ``FormRequest.from_response()`` method (:rev:`1813`, :rev:`1816`)
 - Added ``clickdata`` argument to ``FormRequest.from_response()`` method (:rev:`1802`, :rev:`1803`)
 - Added support for HTTP proxies (``HttpProxyMiddleware``) (:rev:`1781`, :rev:`1785`)
-- Offiste spider middleware now logs messages when filtering out requests (:rev:`1841`)
+- Offsite spider middleware now logs messages when filtering out requests (:rev:`1841`)
 
-Backwards-incompatible changes
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Backward-incompatible changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 - Changed ``scrapy.utils.response.get_meta_refresh()`` signature (:rev:`1804`)
 - Removed deprecated ``scrapy.item.ScrapedItem`` class - use ``scrapy.item.Item instead`` (:rev:`1838`)
@@ -870,20 +3976,41 @@ Backwards-incompatible changes
 - Renamed extension: ``DelayedCloseDomain`` to ``SpiderCloseDelay`` (:rev:`1861` | #121)
 - Removed obsolete ``scrapy.utils.markup.remove_escape_chars`` function - use ``scrapy.utils.markup.replace_escape_chars`` instead (:rev:`1865`)
 
-0.7
----
+Scrapy 0.7
+----------
 
 First release of Scrapy.
 
 
-.. _AJAX crawleable urls: http://code.google.com/web/ajaxcrawling/docs/getting-started.html
-.. _chunked transfer encoding: http://en.wikipedia.org/wiki/Chunked_transfer_encoding
-.. _w3lib: https://github.com/scrapy/w3lib
-.. _scrapely: https://github.com/scrapy/scrapely
-.. _marshal: http://docs.python.org/library/marshal.html
-.. _w3lib.encoding: https://github.com/scrapy/w3lib/blob/master/w3lib/encoding.py
-.. _lxml: http://lxml.de/
+.. _AJAX crawleable urls: https://developers.google.com/search/docs/ajax-crawling/docs/getting-started?csw=1
+.. _botocore: https://github.com/boto/botocore
+.. _chunked transfer encoding: https://en.wikipedia.org/wiki/Chunked_transfer_encoding
 .. _ClientForm: http://wwwsearch.sourceforge.net/old/ClientForm/
-.. _resource: http://docs.python.org/library/resource.html
+.. _Creating a pull request: https://help.github.com/en/articles/creating-a-pull-request
+.. _cryptography: https://cryptography.io/en/latest/
+.. _docstrings: https://docs.python.org/3/glossary.html#term-docstring
+.. _KeyboardInterrupt: https://docs.python.org/3/library/exceptions.html#KeyboardInterrupt
+.. _LevelDB: https://github.com/google/leveldb
+.. _lxml: https://lxml.de/
+.. _marshal: https://docs.python.org/2/library/marshal.html
+.. _parsel.csstranslator.GenericTranslator: https://parsel.readthedocs.io/en/latest/parsel.html#parsel.csstranslator.GenericTranslator
+.. _parsel.csstranslator.HTMLTranslator: https://parsel.readthedocs.io/en/latest/parsel.html#parsel.csstranslator.HTMLTranslator
+.. _parsel.csstranslator.XPathExpr: https://parsel.readthedocs.io/en/latest/parsel.html#parsel.csstranslator.XPathExpr
+.. _PEP 257: https://www.python.org/dev/peps/pep-0257/
+.. _Pillow: https://python-pillow.org/
+.. _pyOpenSSL: https://www.pyopenssl.org/en/stable/
 .. _queuelib: https://github.com/scrapy/queuelib
-.. _cssselect: https://github.com/SimonSapin/cssselect
+.. _registered with IANA: https://www.iana.org/assignments/media-types/media-types.xhtml
+.. _resource: https://docs.python.org/2/library/resource.html
+.. _robots.txt: https://www.robotstxt.org/
+.. _scrapely: https://github.com/scrapy/scrapely
+.. _service_identity: https://service-identity.readthedocs.io/en/stable/
+.. _six: https://six.readthedocs.io/
+.. _tox: https://pypi.org/project/tox/
+.. _Twisted: https://twistedmatrix.com/trac/
+.. _Twisted - hello, asynchronous programming: http://jessenoller.com/blog/2009/02/11/twisted-hello-asynchronous-programming/
+.. _w3lib: https://github.com/scrapy/w3lib
+.. _w3lib.encoding: https://github.com/scrapy/w3lib/blob/master/w3lib/encoding.py
+.. _What is cacheable: https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.9.1
+.. _zope.interface: https://zopeinterface.readthedocs.io/en/latest/
+.. _Zsh: https://www.zsh.org/
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 000000000..3d34b47da
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,4 @@
+Sphinx>=3.0
+sphinx-hoverxref>=0.2b1
+sphinx-notfound-page>=0.4
+sphinx_rtd_theme>=0.4
diff --git a/docs/topics/_images/firebug1.png b/docs/topics/_images/firebug1.png
deleted file mode 100644
index e2eaefa83..000000000
Binary files a/docs/topics/_images/firebug1.png and /dev/null differ
diff --git a/docs/topics/_images/firebug2.png b/docs/topics/_images/firebug2.png
deleted file mode 100644
index 4cab63431..000000000
Binary files a/docs/topics/_images/firebug2.png and /dev/null differ
diff --git a/docs/topics/_images/firebug3.png b/docs/topics/_images/firebug3.png
deleted file mode 100644
index affbe14bc..000000000
Binary files a/docs/topics/_images/firebug3.png and /dev/null differ
diff --git a/docs/topics/_images/inspector_01.png b/docs/topics/_images/inspector_01.png
new file mode 100644
index 000000000..edb8795dc
Binary files /dev/null and b/docs/topics/_images/inspector_01.png differ
diff --git a/docs/topics/_images/network_01.png b/docs/topics/_images/network_01.png
new file mode 100644
index 000000000..1788ea76a
Binary files /dev/null and b/docs/topics/_images/network_01.png differ
diff --git a/docs/topics/_images/network_02.png b/docs/topics/_images/network_02.png
new file mode 100644
index 000000000..5d39ae601
Binary files /dev/null and b/docs/topics/_images/network_02.png differ
diff --git a/docs/topics/_images/network_03.png b/docs/topics/_images/network_03.png
new file mode 100644
index 000000000..472fca958
Binary files /dev/null and b/docs/topics/_images/network_03.png differ
diff --git a/docs/topics/_images/scrapy_architecture_02.png b/docs/topics/_images/scrapy_architecture_02.png
new file mode 100644
index 000000000..5fe393f22
Binary files /dev/null and b/docs/topics/_images/scrapy_architecture_02.png differ
diff --git a/docs/topics/api.rst b/docs/topics/api.rst
index 341340c2a..52509ffdf 100644
--- a/docs/topics/api.rst
+++ b/docs/topics/api.rst
@@ -28,9 +28,10 @@ contains a dictionary of all available extensions and their order similar to
 how you :ref:`configure the downloader middlewares
 <topics-downloader-middleware-setting>`.
 
-.. class:: Crawler(settings)
+.. class:: Crawler(spidercls, settings)
 
     The Crawler object must be instantiated with a
+    :class:`scrapy.spiders.Spider` subclass and a
     :class:`scrapy.settings.Settings` object.
 
     .. attribute:: settings
@@ -64,7 +65,7 @@ how you :ref:`configure the downloader middlewares
 
         For an introduction on stats collection see :ref:`topics-stats`.
 
-        For the API see :class:`~scrapy.statscol.StatsCollector` class.
+        For the API see :class:`~scrapy.statscollectors.StatsCollector` class.
 
     .. attribute:: extensions
 
@@ -75,34 +76,39 @@ how you :ref:`configure the downloader middlewares
         For an introduction on extensions and a list of available extensions on
         Scrapy see :ref:`topics-extensions`.
 
-    .. attribute:: spiders
-
-        The spider manager which takes care of loading and instantiating
-        spiders.
-
-        Most extensions won't need to access this attribute.
-
     .. attribute:: engine
 
         The execution engine, which coordinates the core crawling logic
         between the scheduler, downloader and spiders.
 
-        Some extension may want to access the Scrapy engine, to modify inspect
-        or modify the downloader and scheduler behaviour, although this is an
+        Some extension may want to access the Scrapy engine, to inspect  or 
+        modify the downloader and scheduler behaviour, although this is an
         advanced use and this API is not yet stable.
 
-    .. method:: configure()
+    .. attribute:: spider
 
-        Configure the crawler.
+        Spider currently being crawled. This is an instance of the spider class
+        provided while constructing the crawler, and it is created after the
+        arguments given in the :meth:`crawl` method.
 
-        This loads extensions, middlewares and spiders, leaving the crawler
-        ready to be started. It also configures the execution engine.
+    .. method:: crawl(*args, **kwargs)
 
-    .. method:: start()
+        Starts the crawler by instantiating its spider class with the given
+        ``args`` and ``kwargs`` arguments, while setting the execution engine in
+        motion.
 
-        Start the crawler. This calls :meth:`configure` if it hasn't been called yet.
         Returns a deferred that is fired when the crawl is finished.
 
+    .. automethod:: stop
+
+.. autoclass:: CrawlerRunner
+   :members:
+
+.. autoclass:: CrawlerProcess
+   :show-inheritance:
+   :members:
+   :inherited-members:
+
 .. _topics-api-settings:
 
 Settings API
@@ -129,194 +135,80 @@ Settings API
             'default': 0,
             'command': 10,
             'project': 20,
+            'spider': 30,
             'cmdline': 40,
         }
 
     For a detailed explanation on each settings sources, see:
     :ref:`topics-settings`.
 
-.. class:: Settings(values={}, priority='project')
+.. autofunction:: get_settings_priority
 
-    This object stores Scrapy settings for the configuration of internal
-    components, and can be used for any further customization.
+.. autoclass:: Settings
+   :show-inheritance:
+   :members:
 
-    After instantiation of this class, the new object will have the global
-    default settings described on :ref:`topics-settings-ref` already
-    populated.
+.. autoclass:: BaseSettings
+   :members:
 
-    Additional values can be passed on initialization with the ``values``
-    argument, and they would take the ``priority`` level.  If the latter
-    argument is a string, the priority name will be looked up in
-    :attr:`~scrapy.settings.SETTINGS_PRIORITIES`. Otherwise, a expecific
-    integer should be provided.
+.. _topics-api-spiderloader:
 
-    Once the object is created, new settings can be loaded or updated with the
-    :meth:`~scrapy.settings.Settings.set` method, and can be accessed with the
-    square bracket notation of dictionaries, or with the
-    :meth:`~scrapy.settings.Settings.get` method of the instance and its value
-    conversion variants.  When requesting a stored key, the value with the
-    highest priority will be retrieved.
+SpiderLoader API
+================
 
-    .. method:: set(name, value, priority='project')
+.. module:: scrapy.spiderloader
+   :synopsis: The spider loader
 
-       Store a key/value attribute with a given priority.
+.. class:: SpiderLoader
 
-       Settings should be populated *before* configuring the Crawler object
-       (through the :meth:`~scrapy.crawler.Crawler.configure` method),
-       otherwise they won't have any effect.
+    This class is in charge of retrieving and handling the spider classes
+    defined across the project.
 
-       :param name: the setting name
-       :type name: string
+    Custom spider loaders can be employed by specifying their path in the
+    :setting:`SPIDER_LOADER_CLASS` project setting. They must fully implement
+    the :class:`scrapy.interfaces.ISpiderLoader` interface to guarantee an
+    errorless execution.
 
-       :param value: the value to associate with the setting
-       :type value: any
+    .. method:: from_settings(settings)
 
-       :param priority: the priority of the setting. Should be a key of
-           :attr:`~scrapy.settings.SETTINGS_PRIORITIES` or an integer
-       :type priority: string or int
+       This class method is used by Scrapy to create an instance of the class.
+       It's called with the current project settings, and it loads the spiders
+       found recursively in the modules of the :setting:`SPIDER_MODULES`
+       setting.
 
-    .. method:: setdict(values, priority='project')
+       :param settings: project settings
+       :type settings: :class:`~scrapy.settings.Settings` instance
 
-       Store key/value pairs with a given priority.
+    .. method:: load(spider_name)
 
-       This is a helper function that calls
-       :meth:`~scrapy.settings.Settings.set` for every item of ``values``
-       with the provided ``priority``.
+       Get the Spider class with the given name. It'll look into the previously
+       loaded spiders for a spider class with name ``spider_name`` and will raise
+       a KeyError if not found.
 
-       :param values: the settings names and values
-       :type values: dict
+       :param spider_name: spider class name
+       :type spider_name: str
 
-       :param priority: the priority of the settings. Should be a key of
-           :attr:`~scrapy.settings.SETTINGS_PRIORITIES` or an integer
-       :type priority: string or int
+    .. method:: list()
 
-    .. method:: setmodule(module, priority='project')
+       Get the names of the available spiders in the project.
 
-       Store settings from a module with a given priority.
+    .. method:: find_by_request(request)
 
-       This is a helper function that calls
-       :meth:`~scrapy.settings.Settings.set` for every globally declared
-       uppercase variable of ``module`` with the provided ``priority``.
+       List the spiders' names that can handle the given request. Will try to
+       match the request's url against the domains of the spiders.
 
-       :param module: the module or the path of the module
-       :type module: module object or string
-
-       :param priority: the priority of the settings. Should be a key of
-           :attr:`~scrapy.settings.SETTINGS_PRIORITIES` or an integer
-       :type priority: string or int
-
-    .. method:: get(name, default=None)
-
-       Get a setting value without affecting its original type.
-
-       :param name: the setting name
-       :type name: string
-
-       :param default: the value to return if no setting is found
-       :type default: any
-
-    .. method:: getbool(name, default=False)
-
-       Get a setting value as a boolean. For example, both ``1`` and ``'1'``, and
-       ``True`` return ``True``, while ``0``, ``'0'``, ``False`` and ``None``
-       return ``False````
-
-       For example, settings populated through environment variables set to ``'0'``
-       will return ``False`` when using this method.
-
-       :param name: the setting name
-       :type name: string
-
-       :param default: the value to return if no setting is found
-       :type default: any
-
-    .. method:: getint(name, default=0)
-
-       Get a setting value as an int
-
-       :param name: the setting name
-       :type name: string
-
-       :param default: the value to return if no setting is found
-       :type default: any
-
-    .. method:: getfloat(name, default=0.0)
-
-       Get a setting value as a float
-
-       :param name: the setting name
-       :type name: string
-
-       :param default: the value to return if no setting is found
-       :type default: any
-
-    .. method:: getlist(name, default=None)
-
-       Get a setting value as a list. If the setting original type is a list it
-       will be returned verbatim. If it's a string it will be split by ",".
-
-       For example, settings populated through environment variables set to
-       ``'one,two'`` will return a list ['one', 'two'] when using this method.
-
-       :param name: the setting name
-       :type name: string
-
-       :param default: the value to return if no setting is found
-       :type default: any
+       :param request: queried request
+       :type request: :class:`~scrapy.http.Request` instance
 
 .. _topics-api-signals:
 
 Signals API
 ===========
 
-.. module:: scrapy.signalmanager
-   :synopsis: The signal manager
-
-.. class:: SignalManager
-
-    .. method:: connect(receiver, signal)
-
-        Connect a receiver function to a signal.
-
-        The signal can be any object, although Scrapy comes with some
-        predefined signals that are documented in the :ref:`topics-signals`
-        section.
-
-        :param receiver: the function to be connected
-        :type receiver: callable
-
-        :param signal: the signal to connect to
-        :type signal: object
-
-    .. method:: send_catch_log(signal, \*\*kwargs)
-
-        Send a signal, catch exceptions and log them.
-
-        The keyword arguments are passed to the signal handlers (connected
-        through the :meth:`connect` method).
-
-    .. method:: send_catch_log_deferred(signal, \*\*kwargs)
-
-        Like :meth:`send_catch_log` but supports returning `deferreds`_ from
-        signal handlers.
-
-        Returns a `deferred`_ that gets fired once all signal handlers
-        deferreds were fired. Send a signal, catch exceptions and log them.
-
-        The keyword arguments are passed to the signal handlers (connected
-        through the :meth:`connect` method).
-
-    .. method:: disconnect(receiver, signal)
-
-        Disconnect a receiver function from a signal. This has the opposite
-        effect of the :meth:`connect` method, and the arguments are the same.
-
-    .. method:: disconnect_all(signal)
-
-        Disconnect all receivers from the given signal.
-
-        :param signal: the signal to disconnect from
-        :type signal: object
+.. automodule:: scrapy.signalmanager
+    :synopsis: The signal manager
+    :members:
+    :undoc-members:
 
 .. _topics-api-stats:
 
@@ -324,11 +216,11 @@ Stats Collector API
 ===================
 
 There are several Stats Collectors available under the
-:mod:`scrapy.statscol` module and they all implement the Stats
-Collector API defined by the :class:`~scrapy.statscol.StatsCollector`
+:mod:`scrapy.statscollectors` module and they all implement the Stats
+Collector API defined by the :class:`~scrapy.statscollectors.StatsCollector`
 class (which they all inherit from).
 
-.. module:: scrapy.statscol
+.. module:: scrapy.statscollectors
    :synopsis: Stats Collectors
 
 .. class:: StatsCollector
@@ -358,7 +250,7 @@ class (which they all inherit from).
 
         Set the given value for the given key only if current value for the
         same key is lower than value. If there is no current value for the
-        given key, the value is always set. 
+        given key, the value is always set.
 
     .. method:: min_value(key, value)
 
@@ -381,6 +273,3 @@ class (which they all inherit from).
 
         Close the given spider. After this is called, no more specific stats
         can be accessed or collected.
-
-.. _deferreds: http://twistedmatrix.com/documents/current/core/howto/defer.html
-.. _deferred: http://twistedmatrix.com/documents/current/core/howto/defer.html
diff --git a/docs/topics/architecture.rst b/docs/topics/architecture.rst
index 80ccd42dd..074c59241 100644
--- a/docs/topics/architecture.rst
+++ b/docs/topics/architecture.rst
@@ -12,24 +12,77 @@ Overview
 
 The following diagram shows an overview of the Scrapy architecture with its
 components and an outline of the data flow that takes place inside the system
-(shown by the green arrows). A brief description of the components is included
+(shown by the red arrows). A brief description of the components is included
 below with links for more detailed information about them. The data flow is
 also described below.
 
-.. image:: _images/scrapy_architecture.png
+.. _data-flow:
+
+Data flow
+=========
+
+.. image:: _images/scrapy_architecture_02.png
    :width: 700
-   :height: 494
+   :height: 470
    :alt: Scrapy architecture
 
+The data flow in Scrapy is controlled by the execution engine, and goes like
+this:
+
+1. The :ref:`Engine <component-engine>` gets the initial Requests to crawl from the
+   :ref:`Spider <component-spiders>`.
+
+2. The :ref:`Engine <component-engine>` schedules the Requests in the
+   :ref:`Scheduler <component-scheduler>` and asks for the
+   next Requests to crawl.
+
+3. The :ref:`Scheduler <component-scheduler>` returns the next Requests
+   to the :ref:`Engine <component-engine>`.
+
+4. The :ref:`Engine <component-engine>` sends the Requests to the
+   :ref:`Downloader <component-downloader>`, passing through the
+   :ref:`Downloader Middlewares <component-downloader-middleware>` (see
+   :meth:`~scrapy.downloadermiddlewares.DownloaderMiddleware.process_request`).
+
+5. Once the page finishes downloading the
+   :ref:`Downloader <component-downloader>` generates a Response (with
+   that page) and sends it to the Engine, passing through the
+   :ref:`Downloader Middlewares <component-downloader-middleware>` (see
+   :meth:`~scrapy.downloadermiddlewares.DownloaderMiddleware.process_response`).
+
+6. The :ref:`Engine <component-engine>` receives the Response from the
+   :ref:`Downloader <component-downloader>` and sends it to the
+   :ref:`Spider <component-spiders>` for processing, passing
+   through the :ref:`Spider Middleware <component-spider-middleware>` (see
+   :meth:`~scrapy.spidermiddlewares.SpiderMiddleware.process_spider_input`).
+
+7. The :ref:`Spider <component-spiders>` processes the Response and returns
+   scraped items and new Requests (to follow) to the
+   :ref:`Engine <component-engine>`, passing through the
+   :ref:`Spider Middleware <component-spider-middleware>` (see
+   :meth:`~scrapy.spidermiddlewares.SpiderMiddleware.process_spider_output`).
+
+8. The :ref:`Engine <component-engine>` sends processed items to
+   :ref:`Item Pipelines <component-pipelines>`, then send processed Requests to
+   the :ref:`Scheduler <component-scheduler>` and asks for possible next Requests
+   to crawl.
+
+9. The process repeats (from step 1) until there are no more requests from the
+   :ref:`Scheduler <component-scheduler>`.
+
 Components
 ==========
 
+.. _component-engine:
+
 Scrapy Engine
 -------------
 
 The engine is responsible for controlling the data flow between all components
-of the system, and triggering events when certain actions occur. See the Data
-Flow section below for more details.
+of the system, and triggering events when certain actions occur. See the
+:ref:`Data Flow <data-flow>` section above for more details.
+
+.. _component-scheduler:
 
 Scheduler
 ---------
@@ -37,19 +90,24 @@ Scheduler
 The Scheduler receives requests from the engine and enqueues them for feeding
 them later (also to the engine) when the engine requests them.
 
+.. _component-downloader:
+
 Downloader
 ----------
 
 The Downloader is responsible for fetching web pages and feeding them to the
 engine which, in turn, feeds them to the spiders.
 
+.. _component-spiders:
+
 Spiders
 -------
 
 Spiders are custom classes written by Scrapy users to parse responses and
-extract items (aka scraped items) from them or additional URLs (requests) to
-follow. Each spider is able to handle a specific domain (or group of domains).
-For more information see :ref:`topics-spiders`.
+extract :ref:`items <topics-items>` from them or additional requests to
+follow. For more information see :ref:`topics-spiders`.
+
+.. _component-pipelines:
 
 Item Pipeline
 -------------
@@ -59,57 +117,44 @@ extracted (or scraped) by the spiders. Typical tasks include cleansing,
 validation and persistence (like storing the item in a database). For more
 information see :ref:`topics-item-pipeline`.
 
+.. _component-downloader-middleware:
+
 Downloader middlewares
 ----------------------
 
 Downloader middlewares are specific hooks that sit between the Engine and the
 Downloader and process requests when they pass from the Engine to the
-Downloader, and responses that pass from Downloader to the Engine. They provide
-a convenient mechanism for extending Scrapy functionality by plugging custom
-code. For more information see :ref:`topics-downloader-middleware`.
+Downloader, and responses that pass from Downloader to the Engine.
+
+Use a Downloader middleware if you need to do one of the following:
+
+* process a request just before it is sent to the Downloader
+  (i.e. right before Scrapy sends the request to the website);
+* change received response before passing it to a spider;
+* send a new Request instead of passing received response to a spider;
+* pass response to a spider without fetching a web page;
+* silently drop some requests.
+
+For more information see :ref:`topics-downloader-middleware`.
+
+.. _component-spider-middleware:
 
 Spider middlewares
 ------------------
 
 Spider middlewares are specific hooks that sit between the Engine and the
 Spiders and are able to process spider input (responses) and output (items and
-requests). They provide a convenient mechanism for extending Scrapy
-functionality by plugging custom code. For more information see
-:ref:`topics-spider-middleware`.
+requests).
 
-Data flow
-=========
+Use a Spider middleware if you need to
 
-The data flow in Scrapy is controlled by the execution engine, and goes like
-this:
+* post-process output of spider callbacks - change/add/remove requests or items;
+* post-process start_requests;
+* handle spider exceptions;
+* call errback instead of callback for some of the requests based on response
+  content.
 
-1. The Engine opens a domain, locates the Spider that handles that domain, and
-   asks the spider for the first URLs to crawl.
-
-2. The Engine gets the first URLs to crawl from the Spider and schedules them
-   in the Scheduler, as Requests.
-
-3. The Engine asks the Scheduler for the next URLs to crawl.
-
-4. The Scheduler returns the next URLs to crawl to the Engine and the Engine
-   sends them to the Downloader, passing through the Downloader Middleware
-   (request direction).
-
-5. Once the page finishes downloading the Downloader generates a Response (with
-   that page) and sends it to the Engine, passing through the Downloader
-   Middleware (response direction).
-
-6. The Engine receives the Response from the Downloader and sends it to the
-   Spider for processing, passing through the Spider Middleware (input direction).
-
-7. The Spider processes the Response and returns scraped Items and new Requests
-   (to follow) to the Engine.
-
-8. The Engine sends scraped Items (returned by the Spider) to the Item Pipeline
-   and Requests (returned by spider) to the Scheduler
-
-9. The process repeats (from step 2) until there are no more requests from the
-   Scheduler, and the Engine closes the domain.
+For more information see :ref:`topics-spider-middleware`.
 
 Event-driven networking
 =======================
@@ -121,10 +166,10 @@ for concurrency.
 For more information about asynchronous programming and Twisted see these
 links:
 
-* `Introduction to Deferreds in Twisted`_
+* :doc:`twisted:core/howto/defer-intro`
 * `Twisted - hello, asynchronous programming`_
+* `Twisted Introduction - Krondo`_
 
-.. _Twisted: http://twistedmatrix.com/trac/
-.. _Introduction to Deferreds in Twisted: http://twistedmatrix.com/documents/current/core/howto/defer-intro.html
-.. _Twisted - hello, asynchronous programming: http://jessenoller.com/2009/02/11/twisted-hello-asynchronous-programming/
-
+.. _Twisted: https://twistedmatrix.com/trac/
+.. _Twisted - hello, asynchronous programming: http://jessenoller.com/blog/2009/02/11/twisted-hello-asynchronous-programming/
+.. _Twisted Introduction - Krondo: http://krondo.com/an-introduction-to-asynchronous-programming-and-twisted/
diff --git a/docs/topics/asyncio.rst b/docs/topics/asyncio.rst
new file mode 100644
index 000000000..bfb430d52
--- /dev/null
+++ b/docs/topics/asyncio.rst
@@ -0,0 +1,40 @@
+=======
+asyncio
+=======
+
+.. versionadded:: 2.0
+
+Scrapy has partial support :mod:`asyncio`. After you :ref:`install the asyncio
+reactor <install-asyncio>`, you may use :mod:`asyncio` and
+:mod:`asyncio`-powered libraries in any :doc:`coroutine <coroutines>`.
+
+.. warning:: :mod:`asyncio` support in Scrapy is experimental. Future Scrapy
+             versions may introduce related changes without a deprecation
+             period or warning.
+
+.. _install-asyncio:
+
+Installing the asyncio reactor
+==============================
+
+To enable :mod:`asyncio` support, set the :setting:`TWISTED_REACTOR` setting to
+``'twisted.internet.asyncioreactor.AsyncioSelectorReactor'``.
+
+If you are using :class:`~scrapy.crawler.CrawlerRunner`, you also need to
+install the :class:`~twisted.internet.asyncioreactor.AsyncioSelectorReactor`
+reactor manually. You can do that using
+:func:`~scrapy.utils.reactor.install_reactor`::
+
+    install_reactor('twisted.internet.asyncioreactor.AsyncioSelectorReactor')
+
+.. _using-custom-loops:
+
+Using custom asyncio loops
+==========================    
+
+You can also use custom asyncio event loops with the asyncio reactor. Set the
+:setting:`ASYNCIO_EVENT_LOOP` setting to the import path of the desired event loop class to
+use it instead of the default asyncio event loop.
+
+
+
diff --git a/docs/topics/autothrottle.rst b/docs/topics/autothrottle.rst
index e7b900876..4317019fc 100644
--- a/docs/topics/autothrottle.rst
+++ b/docs/topics/autothrottle.rst
@@ -1,3 +1,5 @@
+.. _topics-autothrottle:
+
 ======================
 AutoThrottle extension
 ======================
@@ -9,14 +11,66 @@ Design goals
 ============
 
 1. be nicer to sites instead of using default download delay of zero
-2. automatically adjust scrapy to the optimum crawling speed, so the user
-   doesn't have to tune the download delays and concurrent requests to find the
-   optimum one. the user only needs to specify the maximum concurrent requests
+2. automatically adjust Scrapy to the optimum crawling speed, so the user
+   doesn't have to tune the download delays to find the optimum one.
+   The user only needs to specify the maximum concurrent requests
    it allows, and the extension does the rest.
 
+.. _autothrottle-algorithm:
+
 How it works
 ============
 
+AutoThrottle extension adjusts download delays dynamically to make spider send
+:setting:`AUTOTHROTTLE_TARGET_CONCURRENCY` concurrent requests on average
+to each remote website.
+
+It uses download latency to compute the delays. The main idea is the
+following: if a server needs ``latency`` seconds to respond, a client
+should send a request each ``latency/N`` seconds to have ``N`` requests
+processed in parallel.
+
+Instead of adjusting the delays one can just set a small fixed
+download delay and impose hard limits on concurrency using
+:setting:`CONCURRENT_REQUESTS_PER_DOMAIN` or
+:setting:`CONCURRENT_REQUESTS_PER_IP` options. It will provide a similar
+effect, but there are some important differences:
+
+* because the download delay is small there will be occasional bursts
+  of requests;
+* often non-200 (error) responses can be returned faster than regular
+  responses, so with a small download delay and a hard concurrency limit
+  crawler will be sending requests to server faster when server starts to
+  return errors. But this is an opposite of what crawler should do - in case
+  of errors it makes more sense to slow down: these errors may be caused by
+  the high request rate.
+
+AutoThrottle doesn't have these issues.
+
+Throttling algorithm
+====================
+
+AutoThrottle algorithm adjusts download delays based on the following rules:
+
+1. spiders always start with a download delay of
+   :setting:`AUTOTHROTTLE_START_DELAY`;
+2. when a response is received, the target download delay is calculated as
+   ``latency / N`` where ``latency`` is a latency of the response,
+   and ``N`` is :setting:`AUTOTHROTTLE_TARGET_CONCURRENCY`.
+3. download delay for next requests is set to the average of previous
+   download delay and the target download delay;
+4. latencies of non-200 responses are not allowed to decrease the delay;
+5. download delay can't become less than :setting:`DOWNLOAD_DELAY` or greater
+   than :setting:`AUTOTHROTTLE_MAX_DELAY`
+
+.. note:: The AutoThrottle extension honours the standard Scrapy settings for
+   concurrency and delay. This means that it will respect
+   :setting:`CONCURRENT_REQUESTS_PER_DOMAIN` and
+   :setting:`CONCURRENT_REQUESTS_PER_IP` options and
+   never set a download delay lower than :setting:`DOWNLOAD_DELAY`.
+
+.. _download-latency:
+
 In Scrapy, the download latency is measured as the time elapsed between
 establishing the TCP connection and receiving the HTTP headers.
 
@@ -26,24 +80,6 @@ callback, for example, and unable to attend downloads. However, these latencies
 should still give a reasonable estimate of how busy Scrapy (and ultimately, the
 server) is, and this extension builds on that premise.
 
-.. _autothrottle-algorithm:
-
-Throttling algorithm
-====================
-
-This adjusts download delays and concurrency based on the following rules:
-
-1. spiders always start with one concurrent request and a download delay of
-   :setting:`AUTOTHROTTLE_START_DELAY`
-2. when a response is received, the download delay is adjusted to the
-   average of previous download delay and the latency of the response.
-
-.. note:: The AutoThrottle extension honours the standard Scrapy settings for
-   concurrency and delay. This means that it will never set a download delay
-   lower than :setting:`DOWNLOAD_DELAY` or a concurrency higher than
-   :setting:`CONCURRENT_REQUESTS_PER_DOMAIN`
-   (or :setting:`CONCURRENT_REQUESTS_PER_IP`, depending on which one you use).
-
 Settings
 ========
 
@@ -52,6 +88,7 @@ The settings used to control the AutoThrottle extension are:
 * :setting:`AUTOTHROTTLE_ENABLED`
 * :setting:`AUTOTHROTTLE_START_DELAY`
 * :setting:`AUTOTHROTTLE_MAX_DELAY`
+* :setting:`AUTOTHROTTLE_TARGET_CONCURRENCY`
 * :setting:`AUTOTHROTTLE_DEBUG`
 * :setting:`CONCURRENT_REQUESTS_PER_DOMAIN`
 * :setting:`CONCURRENT_REQUESTS_PER_IP`
@@ -86,6 +123,36 @@ Default: ``60.0``
 
 The maximum download delay (in seconds) to be set in case of high latencies.
 
+.. setting:: AUTOTHROTTLE_TARGET_CONCURRENCY
+
+AUTOTHROTTLE_TARGET_CONCURRENCY
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. versionadded:: 1.1
+
+Default: ``1.0``
+
+Average number of requests Scrapy should be sending in parallel to remote
+websites.
+
+By default, AutoThrottle adjusts the delay to send a single
+concurrent request to each of the remote websites. Set this option to
+a higher value (e.g. ``2.0``) to increase the throughput and the load on remote
+servers. A lower ``AUTOTHROTTLE_TARGET_CONCURRENCY`` value
+(e.g. ``0.5``) makes the crawler more conservative and polite.
+
+Note that :setting:`CONCURRENT_REQUESTS_PER_DOMAIN`
+and :setting:`CONCURRENT_REQUESTS_PER_IP` options are still respected
+when AutoThrottle extension is enabled. This means that if
+``AUTOTHROTTLE_TARGET_CONCURRENCY`` is set to a value higher than
+:setting:`CONCURRENT_REQUESTS_PER_DOMAIN` or
+:setting:`CONCURRENT_REQUESTS_PER_IP`, the crawler won't reach this number
+of concurrent requests.
+
+At every given time point Scrapy can be sending more or less concurrent
+requests than ``AUTOTHROTTLE_TARGET_CONCURRENCY``; it is a suggested
+value the crawler tries to approach, not a hard limit.
+
 .. setting:: AUTOTHROTTLE_DEBUG
 
 AUTOTHROTTLE_DEBUG
diff --git a/docs/topics/benchmarking.rst b/docs/topics/benchmarking.rst
index 9ae6dd2cb..99469ebf1 100644
--- a/docs/topics/benchmarking.rst
+++ b/docs/topics/benchmarking.rst
@@ -18,40 +18,66 @@ To run it use::
 
 You should see an output like this::
 
-    2013-05-16 13:08:46-0300 [scrapy] INFO: Scrapy 0.17.0 started (bot: scrapybot)
-    2013-05-16 13:08:47-0300 [follow] INFO: Spider opened
-    2013-05-16 13:08:47-0300 [follow] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
-    2013-05-16 13:08:48-0300 [follow] INFO: Crawled 74 pages (at 4440 pages/min), scraped 0 items (at 0 items/min)
-    2013-05-16 13:08:49-0300 [follow] INFO: Crawled 143 pages (at 4140 pages/min), scraped 0 items (at 0 items/min)
-    2013-05-16 13:08:50-0300 [follow] INFO: Crawled 210 pages (at 4020 pages/min), scraped 0 items (at 0 items/min)
-    2013-05-16 13:08:51-0300 [follow] INFO: Crawled 274 pages (at 3840 pages/min), scraped 0 items (at 0 items/min)
-    2013-05-16 13:08:52-0300 [follow] INFO: Crawled 343 pages (at 4140 pages/min), scraped 0 items (at 0 items/min)
-    2013-05-16 13:08:53-0300 [follow] INFO: Crawled 410 pages (at 4020 pages/min), scraped 0 items (at 0 items/min)
-    2013-05-16 13:08:54-0300 [follow] INFO: Crawled 474 pages (at 3840 pages/min), scraped 0 items (at 0 items/min)
-    2013-05-16 13:08:55-0300 [follow] INFO: Crawled 538 pages (at 3840 pages/min), scraped 0 items (at 0 items/min)
-    2013-05-16 13:08:56-0300 [follow] INFO: Crawled 602 pages (at 3840 pages/min), scraped 0 items (at 0 items/min)
-    2013-05-16 13:08:57-0300 [follow] INFO: Closing spider (closespider_timeout)
-    2013-05-16 13:08:57-0300 [follow] INFO: Crawled 666 pages (at 3840 pages/min), scraped 0 items (at 0 items/min)
-    2013-05-16 13:08:57-0300 [follow] INFO: Dumping Scrapy stats:
-        {'downloader/request_bytes': 231508,
-         'downloader/request_count': 682,
-         'downloader/request_method_count/GET': 682,
-         'downloader/response_bytes': 1172802,
-         'downloader/response_count': 682,
-         'downloader/response_status_count/200': 682,
-         'finish_reason': 'closespider_timeout',
-         'finish_time': datetime.datetime(2013, 5, 16, 16, 8, 57, 985539),
-         'log_count/INFO': 14,
-         'request_depth_max': 34,
-         'response_received_count': 682,
-         'scheduler/dequeued': 682,
-         'scheduler/dequeued/memory': 682,
-         'scheduler/enqueued': 12767,
-         'scheduler/enqueued/memory': 12767,
-         'start_time': datetime.datetime(2013, 5, 16, 16, 8, 47, 676539)}
-    2013-05-16 13:08:57-0300 [follow] INFO: Spider closed (closespider_timeout)
+    2016-12-16 21:18:48 [scrapy.utils.log] INFO: Scrapy 1.2.2 started (bot: quotesbot)
+    2016-12-16 21:18:48 [scrapy.utils.log] INFO: Overridden settings: {'CLOSESPIDER_TIMEOUT': 10, 'ROBOTSTXT_OBEY': True, 'SPIDER_MODULES': ['quotesbot.spiders'], 'LOGSTATS_INTERVAL': 1, 'BOT_NAME': 'quotesbot', 'LOG_LEVEL': 'INFO', 'NEWSPIDER_MODULE': 'quotesbot.spiders'}
+    2016-12-16 21:18:49 [scrapy.middleware] INFO: Enabled extensions:
+    ['scrapy.extensions.closespider.CloseSpider',
+     'scrapy.extensions.logstats.LogStats',
+     'scrapy.extensions.telnet.TelnetConsole',
+     'scrapy.extensions.corestats.CoreStats']
+    2016-12-16 21:18:49 [scrapy.middleware] INFO: Enabled downloader middlewares:
+    ['scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware',
+     'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
+     'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
+     'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
+     'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',
+     'scrapy.downloadermiddlewares.retry.RetryMiddleware',
+     'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
+     'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',
+     'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',
+     'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',
+     'scrapy.downloadermiddlewares.stats.DownloaderStats']
+    2016-12-16 21:18:49 [scrapy.middleware] INFO: Enabled spider middlewares:
+    ['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware',
+     'scrapy.spidermiddlewares.offsite.OffsiteMiddleware',
+     'scrapy.spidermiddlewares.referer.RefererMiddleware',
+     'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware',
+     'scrapy.spidermiddlewares.depth.DepthMiddleware']
+    2016-12-16 21:18:49 [scrapy.middleware] INFO: Enabled item pipelines:
+    []
+    2016-12-16 21:18:49 [scrapy.core.engine] INFO: Spider opened
+    2016-12-16 21:18:49 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
+    2016-12-16 21:18:50 [scrapy.extensions.logstats] INFO: Crawled 70 pages (at 4200 pages/min), scraped 0 items (at 0 items/min)
+    2016-12-16 21:18:51 [scrapy.extensions.logstats] INFO: Crawled 134 pages (at 3840 pages/min), scraped 0 items (at 0 items/min)
+    2016-12-16 21:18:52 [scrapy.extensions.logstats] INFO: Crawled 198 pages (at 3840 pages/min), scraped 0 items (at 0 items/min)
+    2016-12-16 21:18:53 [scrapy.extensions.logstats] INFO: Crawled 254 pages (at 3360 pages/min), scraped 0 items (at 0 items/min)
+    2016-12-16 21:18:54 [scrapy.extensions.logstats] INFO: Crawled 302 pages (at 2880 pages/min), scraped 0 items (at 0 items/min)
+    2016-12-16 21:18:55 [scrapy.extensions.logstats] INFO: Crawled 358 pages (at 3360 pages/min), scraped 0 items (at 0 items/min)
+    2016-12-16 21:18:56 [scrapy.extensions.logstats] INFO: Crawled 406 pages (at 2880 pages/min), scraped 0 items (at 0 items/min)
+    2016-12-16 21:18:57 [scrapy.extensions.logstats] INFO: Crawled 438 pages (at 1920 pages/min), scraped 0 items (at 0 items/min)
+    2016-12-16 21:18:58 [scrapy.extensions.logstats] INFO: Crawled 470 pages (at 1920 pages/min), scraped 0 items (at 0 items/min)
+    2016-12-16 21:18:59 [scrapy.core.engine] INFO: Closing spider (closespider_timeout)
+    2016-12-16 21:18:59 [scrapy.extensions.logstats] INFO: Crawled 518 pages (at 2880 pages/min), scraped 0 items (at 0 items/min)
+    2016-12-16 21:19:00 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
+    {'downloader/request_bytes': 229995,
+     'downloader/request_count': 534,
+     'downloader/request_method_count/GET': 534,
+     'downloader/response_bytes': 1565504,
+     'downloader/response_count': 534,
+     'downloader/response_status_count/200': 534,
+     'finish_reason': 'closespider_timeout',
+     'finish_time': datetime.datetime(2016, 12, 16, 16, 19, 0, 647725),
+     'log_count/INFO': 17,
+     'request_depth_max': 19,
+     'response_received_count': 534,
+     'scheduler/dequeued': 533,
+     'scheduler/dequeued/memory': 533,
+     'scheduler/enqueued': 10661,
+     'scheduler/enqueued/memory': 10661,
+     'start_time': datetime.datetime(2016, 12, 16, 16, 18, 49, 799869)}
+    2016-12-16 21:19:00 [scrapy.core.engine] INFO: Spider closed (closespider_timeout)
 
-That tells you that Scrapy is able to crawl about 3900 pages per minute in the
+That tells you that Scrapy is able to crawl about 3000 pages per minute in the
 hardware where you run it. Note that this is a very simple spider intended to
 follow links, any custom spider you write will probably do more stuff which
 results in slower crawl rates. How slower depends on how much your spider does
diff --git a/docs/topics/broad-crawls.rst b/docs/topics/broad-crawls.rst
index b95974f5d..63b60312e 100644
--- a/docs/topics/broad-crawls.rst
+++ b/docs/topics/broad-crawls.rst
@@ -20,7 +20,7 @@ These are some common properties often found in broad crawls:
 
 * they crawl many domains (often, unbounded) instead of a specific set of sites
 
-* they don't necessarily crawl domains to completion, because it would
+* they don't necessarily crawl domains to completion, because it would be
   impractical (or impossible) to do so, and instead limit the crawl by time or
   number of pages crawled
 
@@ -34,29 +34,76 @@ These are some common properties often found in broad crawls:
 
 As said above, Scrapy default settings are optimized for focused crawls, not
 broad crawls. However, due to its asynchronous architecture, Scrapy is very
-well suited for performing fast broad crawls. This page summarize some things
+well suited for performing fast broad crawls. This page summarizes some things
 you need to keep in mind when using Scrapy for doing broad crawls, along with
 concrete suggestions of Scrapy settings to tune in order to achieve an
 efficient broad crawl.
 
+.. _broad-crawls-scheduler-priority-queue:
+
+Use the right :setting:`SCHEDULER_PRIORITY_QUEUE`
+=================================================
+
+Scrapy’s default scheduler priority queue is ``'scrapy.pqueues.ScrapyPriorityQueue'``.
+It works best during single-domain crawl. It does not work well with crawling
+many different domains in parallel
+
+To apply the recommended priority queue use::
+
+    SCHEDULER_PRIORITY_QUEUE = 'scrapy.pqueues.DownloaderAwarePriorityQueue'
+
+.. _broad-crawls-concurrency:
+
 Increase concurrency
 ====================
 
 Concurrency is the number of requests that are processed in parallel. There is
-a global limit and a per-domain limit.
+a global limit (:setting:`CONCURRENT_REQUESTS`) and an additional limit that
+can be set either per domain (:setting:`CONCURRENT_REQUESTS_PER_DOMAIN`) or per
+IP (:setting:`CONCURRENT_REQUESTS_PER_IP`).
+
+.. note:: The scheduler priority queue :ref:`recommended for broad crawls
+          <broad-crawls-scheduler-priority-queue>` does not support
+          :setting:`CONCURRENT_REQUESTS_PER_IP`.
 
 The default global concurrency limit in Scrapy is not suitable for crawling
-many different  domains in parallel, so you will want to increase it. How much
-to increase it will depend on how much CPU you crawler will have available. A
-good starting point is ``100``, but the best way to find out is by doing some
-trials and identifying at what concurrency your Scrapy process gets CPU
-bounded. For optimum performance, You should pick a concurrency where CPU usage
-is at 80-90%.
+many different domains in parallel, so you will want to increase it. How much
+to increase it will depend on how much CPU and memory you crawler will have
+available.
 
-To increase the global concurrency use::
+A good starting point is ``100``::
 
     CONCURRENT_REQUESTS = 100
 
+But the best way to find out is by doing some trials and identifying at what
+concurrency your Scrapy process gets CPU bounded. For optimum performance, you
+should pick a concurrency where CPU usage is at 80-90%.
+
+Increasing concurrency also increases memory usage. If memory usage is a
+concern, you might need to lower your global concurrency limit accordingly.
+
+
+Increase Twisted IO thread pool maximum size
+============================================
+
+Currently Scrapy does DNS resolution in a blocking way with usage of thread
+pool. With higher concurrency levels the crawling could be slow or even fail
+hitting DNS resolver timeouts. Possible solution to increase the number of
+threads handling DNS queries. The DNS queue will be processed faster speeding
+up establishing of connection and crawling overall.
+
+To increase maximum thread pool size use::
+
+    REACTOR_THREADPOOL_MAXSIZE = 20
+
+Setup your own DNS
+==================
+
+If you have multiple crawling processes and single central DNS, it can act
+like DoS attack on the DNS server resulting to slow down of entire network or
+even blocking your machines. To avoid this setup your own DNS server with
+local cache and upstream to some large DNS like OpenDNS or Verizon.
+
 Reduce log level
 ================
 
@@ -64,8 +111,8 @@ When doing broad crawls you are often only interested in the crawl rates you
 get and any errors found. These stats are reported by Scrapy when using the
 ``INFO`` log level. In order to save CPU (and log storage requirements) you
 should not use ``DEBUG`` log level when preforming large broad crawls in
-production. Using ``DEBUG`` level when developing your (broad) crawler may fine
-though.
+production. Using ``DEBUG`` level when developing your (broad) crawler may be
+fine though.
 
 To set the log level use::
 
@@ -141,4 +188,33 @@ AjaxCrawlMiddleware helps to crawl them correctly.
 It is turned OFF by default because it has some performance overhead,
 and enabling it for focused crawls doesn't make much sense.
 
-.. _ajax crawlable: https://developers.google.com/webmasters/ajax-crawling/docs/getting-started
+.. _ajax crawlable: https://developers.google.com/search/docs/ajax-crawling/docs/getting-started
+
+.. _broad-crawls-bfo:
+
+Crawl in BFO order
+==================
+
+:ref:`Scrapy crawls in DFO order by default <faq-bfo-dfo>`.
+
+In broad crawls, however, page crawling tends to be faster than page
+processing. As a result, unprocessed early requests stay in memory until the
+final depth is reached, which can significantly increase memory usage.
+
+:ref:`Crawl in BFO order <faq-bfo-dfo>` instead to save memory.
+
+
+Be mindful of memory leaks
+==========================
+
+If your broad crawl shows a high memory usage, in addition to :ref:`crawling in
+BFO order <broad-crawls-bfo>` and :ref:`lowering concurrency
+<broad-crawls-concurrency>` you should :ref:`debug your memory leaks
+<topics-leaks>`.
+
+
+Install a specific Twisted reactor
+==================================
+
+If the crawl is exceeding the system's capabilities, you might want to try
+installing a specific Twisted reactor, via the :setting:`TWISTED_REACTOR` setting.
diff --git a/docs/topics/commands.rst b/docs/topics/commands.rst
index 545a2f165..9638a2322 100644
--- a/docs/topics/commands.rst
+++ b/docs/topics/commands.rst
@@ -1,3 +1,5 @@
+.. highlight:: none
+
 .. _topics-commands:
 
 =================
@@ -13,6 +15,33 @@ just call "commands" or "Scrapy commands".
 The Scrapy tool provides several commands, for multiple purposes, and each one
 accepts a different set of arguments and options.
 
+(The ``scrapy deploy`` command has been removed in 1.0 in favor of the
+standalone ``scrapyd-deploy``. See `Deploying your project`_.)
+
+.. _topics-config-settings:
+
+Configuration settings
+======================
+
+Scrapy will look for configuration parameters in ini-style ``scrapy.cfg`` files
+in standard locations:
+
+1. ``/etc/scrapy.cfg`` or ``c:\scrapy\scrapy.cfg`` (system-wide),
+2. ``~/.config/scrapy.cfg`` (``$XDG_CONFIG_HOME``) and ``~/.scrapy.cfg`` (``$HOME``)
+   for global (user-wide) settings, and
+3. ``scrapy.cfg`` inside a Scrapy project's root (see next section).
+
+Settings from these files are merged in the listed order of preference:
+user-defined values have higher priority than system-wide defaults
+and project-wide settings will override all others, when defined.
+
+Scrapy also understands, and can be configured through, a number of environment
+variables. Currently these are:
+
+* ``SCRAPY_SETTINGS_MODULE`` (see :ref:`topics-settings-module-envvar`)
+* ``SCRAPY_PROJECT`` (see :ref:`topics-project-envvar`)
+* ``SCRAPY_PYTHON_SHELL`` (see :ref:`topics-shell`)
+
 .. _topics-project-structure:
 
 Default structure of Scrapy projects
@@ -28,6 +57,7 @@ structure by default, similar to this::
    myproject/
        __init__.py
        items.py
+       middlewares.py
        pipelines.py
        settings.py
        spiders/
@@ -38,11 +68,42 @@ structure by default, similar to this::
 
 The directory where the ``scrapy.cfg`` file resides is known as the *project
 root directory*. That file contains the name of the python module that defines
-the project settings. Here is an example::
+the project settings. Here is an example:
+
+.. code-block:: ini
 
     [settings]
     default = myproject.settings
 
+.. _topics-project-envvar:
+
+Sharing the root directory between projects
+===========================================
+
+A project root directory, the one that contains the ``scrapy.cfg``, may be
+shared by multiple Scrapy projects, each with its own settings module.
+
+In that case, you must define one or more aliases for those settings modules
+under ``[settings]`` in your ``scrapy.cfg`` file:
+
+.. code-block:: ini
+
+    [settings]
+    default = myproject1.settings
+    project1 = myproject1.settings
+    project2 = myproject2.settings
+
+By default, the ``scrapy`` command-line tool will use the ``default`` settings.
+Use the ``SCRAPY_PROJECT`` environment variable to specify a different project
+for ``scrapy`` to use::
+
+    $ scrapy settings --get BOT_NAME
+    Project 1 Bot
+    $ export SCRAPY_PROJECT=project2
+    $ scrapy settings --get BOT_NAME
+    Project 2 Bot
+
+
 Using the ``scrapy`` tool
 =========================
 
@@ -59,8 +120,8 @@ some usage help and the available commands::
       fetch         Fetch a URL using the Scrapy downloader
     [...]
 
-The first line will print the currently active project, if you're inside a
-Scrapy project. In this, it was run from outside a project. If run from inside
+The first line will print the currently active project if you're inside a
+Scrapy project. In this example it was run from outside a project. If run from inside
 a project it would have printed something like this::
 
     Scrapy X.Y - project: myproject
@@ -76,13 +137,14 @@ Creating projects
 The first thing you typically do with the ``scrapy`` tool is create your Scrapy
 project::
 
-    scrapy startproject myproject
+    scrapy startproject myproject [project_dir]
 
-That will create a Scrapy project under the ``myproject`` directory.
+That will create a Scrapy project under the ``project_dir`` directory.
+If ``project_dir`` wasn't specified, ``project_dir`` will be the same as ``myproject``.
 
 Next, you go inside the new project directory::
 
-    cd myproject
+    cd project_dir
 
 And you're ready to use the ``scrapy`` command to manage and control your
 project from there.
@@ -114,7 +176,7 @@ Available tool commands
 =======================
 
 This section contains a list of the available built-in commands with a
-description and some usage examples. Remember you can always get more info
+description and some usage examples. Remember, you can always get more info
 about each command by running::
 
     scrapy <command> -h
@@ -132,6 +194,7 @@ settings).
 Global commands:
 
 * :command:`startproject`
+* :command:`genspider`
 * :command:`settings`
 * :command:`runspider`
 * :command:`shell`
@@ -146,8 +209,6 @@ Project-only commands:
 * :command:`list`
 * :command:`edit`
 * :command:`parse`
-* :command:`genspider`
-* :command:`deploy`
 * :command:`bench`
 
 .. command:: startproject
@@ -155,11 +216,12 @@ Project-only commands:
 startproject
 ------------
 
-* Syntax: ``scrapy startproject <project_name>``
+* Syntax: ``scrapy startproject <project_name> [project_dir]``
 * Requires project: *no*
 
-Creates a new Scrapy project named ``project_name``, under the ``project_name``
+Creates a new Scrapy project named ``project_name``, under the ``project_dir``
 directory.
+If ``project_dir`` wasn't specified, ``project_dir`` will be the same as ``project_name``.
 
 Usage example::
 
@@ -171,14 +233,9 @@ genspider
 ---------
 
 * Syntax: ``scrapy genspider [-t template] <name> <domain>``
-* Requires project: *yes*
+* Requires project: *no*
 
-Create a new spider in the current project.
-
-This is just a convenient shortcut command for creating spiders based on
-pre-defined templates, but certainly not the only way to create spiders. You
-can just create the spider source code files yourself, instead of using this
-command.
+Create a new spider in the current folder or in the current project's ``spiders`` folder, if called from inside a project. The ``<name>`` parameter is set as the spider's ``name``, while ``<domain>`` is used to generate the ``allowed_domains`` and ``start_urls`` spider's attributes.
 
 Usage example::
 
@@ -189,22 +246,16 @@ Usage example::
       csvfeed
       xmlfeed
 
-    $ scrapy genspider -d basic
-    import scrapy
+    $ scrapy genspider example example.com
+    Created spider 'example' using template 'basic'
 
-    class $classname(scrapy.Spider):
-        name = "$name"
-        allowed_domains = ["$domain"]
-        start_urls = (
-            'http://www.$domain/',
-            )
+    $ scrapy genspider -t crawl scrapyorg scrapy.org
+    Created spider 'scrapyorg' using template 'crawl'
 
-        def parse(self, response):
-            pass
-
-    $ scrapy genspider -t basic example example.com
-    Created spider 'example' using template 'basic' in module:
-      mybot.spiders.example
+This is just a convenience shortcut command for creating spiders based on
+pre-defined templates, but certainly not the only way to create spiders. You
+can just create the spider source code files yourself, instead of using this
+command.
 
 .. command:: crawl
 
@@ -232,6 +283,8 @@ check
 
 Run contract checks.
 
+.. skip: start
+
 Usage examples::
 
     $ scrapy check -l
@@ -249,6 +302,8 @@ Usage examples::
     [FAILED] first_spider:parse
     >>> Returned 92 requests, expected 0..4
 
+.. skip: end
+
 .. command:: list
 
 list
@@ -274,12 +329,12 @@ edit
 * Syntax: ``scrapy edit <spider>``
 * Requires project: *yes*
 
-Edit the given spider using the editor defined in the :setting:`EDITOR`
-setting.
+Edit the given spider using the editor defined in the ``EDITOR`` environment
+variable or (if unset) the :setting:`EDITOR` setting.
 
-This command is provided only as a convenient shortcut for the most common
+This command is provided only as a convenience shortcut for the most common
 case, the developer is of course free to choose any tool or IDE to write and
-debug his spiders.
+debug spiders.
 
 Usage example::
 
@@ -297,7 +352,7 @@ Downloads the given URL using the Scrapy downloader and writes the contents to
 standard output.
 
 The interesting thing about this command is that it fetches the page how the
-spider would download it. For example, if the spider has an ``USER_AGENT``
+spider would download it. For example, if the spider has a ``USER_AGENT``
 attribute which overrides the User Agent, it will use that one.
 
 So this command can be used to "see" how your spider would fetch a certain page.
@@ -305,6 +360,14 @@ So this command can be used to "see" how your spider would fetch a certain page.
 If used outside a project, no particular per-spider behaviour would be applied
 and it will just use the default Scrapy downloader settings.
 
+Supported options:
+
+* ``--spider=SPIDER``: bypass spider autodetection and force use of specific spider
+
+* ``--headers``: print the response's HTTP headers instead of the response's body
+
+* ``--no-redirect``: do not follow HTTP 3xx redirects (default is to follow them)
+
 Usage examples::
 
     $ scrapy fetch --nolog http://www.example.com/some/page.html
@@ -333,6 +396,12 @@ Opens the given URL in a browser, as your Scrapy spider would "see" it.
 Sometimes spiders see pages differently from regular users, so this can be used
 to check what the spider "sees" and confirm it's what you expect.
 
+Supported options:
+
+* ``--spider=SPIDER``: bypass spider autodetection and force use of specific spider
+
+* ``--no-redirect``: do not follow HTTP 3xx redirects (default is to follow them)
+
 Usage example::
 
     $ scrapy view http://www.example.com/some/page.html
@@ -347,13 +416,38 @@ shell
 * Requires project: *no*
 
 Starts the Scrapy shell for the given URL (if given) or empty if no URL is
-given. See :ref:`topics-shell` for more info.
+given. Also supports UNIX-style local file paths, either relative with
+``./`` or ``../`` prefixes or absolute file paths.
+See :ref:`topics-shell` for more info.
+
+Supported options:
+
+* ``--spider=SPIDER``: bypass spider autodetection and force use of specific spider
+
+* ``-c code``: evaluate the code in the shell, print the result and exit
+
+* ``--no-redirect``: do not follow HTTP 3xx redirects (default is to follow them);
+  this only affects the URL you may pass as argument on the command line;
+  once you are inside the shell, ``fetch(url)`` will still follow HTTP redirects by default.
 
 Usage example::
 
     $ scrapy shell http://www.example.com/some/page.html
     [ ... scrapy shell starts ... ]
 
+    $ scrapy shell --nolog http://www.example.com/ -c '(response.status, response.url)'
+    (200, 'http://www.example.com/')
+
+    # shell follows HTTP redirects by default
+    $ scrapy shell --nolog http://httpbin.org/redirect-to?url=http%3A%2F%2Fexample.com%2F -c '(response.status, response.url)'
+    (200, 'http://example.com/')
+
+    # you can disable this with --no-redirect
+    # (only for the URL passed as command line argument)
+    $ scrapy shell --no-redirect --nolog http://httpbin.org/redirect-to?url=http%3A%2F%2Fexample.com%2F -c '(response.status, response.url)'
+    (302, 'http://httpbin.org/redirect-to?url=http%3A%2F%2Fexample.com%2F')
+
+
 .. command:: parse
 
 parse
@@ -374,9 +468,15 @@ Supported options:
 * ``--callback`` or ``-c``: spider method to use as callback for parsing the
   response
 
+* ``--meta`` or ``-m``: additional request meta that will be passed to the callback
+  request. This must be a valid json string. Example: --meta='{"foo" : "bar"}'
+
+* ``--cbkwargs``: additional keyword arguments that will be passed to the callback.
+  This must be a valid json string. Example: --cbkwargs='{"foo" : "bar"}'
+
 * ``--pipelines``: process items through pipelines
 
-* ``--rules`` or ``-r``: use :class:`~scrapy.contrib.spiders.CrawlSpider`
+* ``--rules`` or ``-r``: use :class:`~scrapy.spiders.CrawlSpider`
   rules to discover the callback (i.e. spider method) to use for parsing the
   response
 
@@ -391,6 +491,12 @@ Supported options:
 
 * ``--verbose`` or ``-v``: display information for each depth level
 
+* ``--output`` or ``-o``: dump scraped items to a file
+
+  .. versionadded:: 2.3
+
+.. skip: start
+
 Usage example::
 
     $ scrapy parse http://www.example.com/ -c parse_item
@@ -398,13 +504,15 @@ Usage example::
 
     >>> STATUS DEPTH LEVEL 1 <<<
     # Scraped Items  ------------------------------------------------------------
-    [{'name': u'Example item',
-     'category': u'Furniture',
-     'length': u'12 cm'}]
+    [{'name': 'Example item',
+     'category': 'Furniture',
+     'length': '12 cm'}]
 
     # Requests  -----------------------------------------------------------------
     []
 
+.. skip: end
+
 
 .. command:: settings
 
@@ -453,18 +561,6 @@ version
 Prints the Scrapy version. If used with ``-v`` it also prints Python, Twisted
 and Platform info, which is useful for bug reports.
 
-.. command:: deploy
-
-deploy
-------
-
-.. versionadded:: 0.11
-
-* Syntax: ``scrapy deploy [ <target:project> | -l <target> | -L ]``
-* Requires project: *yes*
-
-Deploy the project into a Scrapyd server. See `Deploying your project`_.
-
 .. command:: bench
 
 bench
@@ -484,7 +580,7 @@ You can also add your custom project commands by using the
 :setting:`COMMANDS_MODULE` setting. See the Scrapy commands in
 `scrapy/commands`_ for examples on how to implement your commands.
 
-.. _scrapy/commands: https://github.com/scrapy/scrapy/blob/master/scrapy/commands
+.. _scrapy/commands: https://github.com/scrapy/scrapy/tree/master/scrapy/commands
 .. setting:: COMMANDS_MODULE
 
 COMMANDS_MODULE
@@ -495,8 +591,35 @@ Default: ``''`` (empty string)
 A module to use for looking up custom Scrapy commands. This is used to add custom
 commands for your Scrapy project.
 
-Example::
+Example:
+
+.. code-block:: python
 
     COMMANDS_MODULE = 'mybot.commands'
 
-.. _Deploying your project: http://scrapyd.readthedocs.org/en/latest/deploy.html
+.. _Deploying your project: https://scrapyd.readthedocs.io/en/latest/deploy.html
+
+Register commands via setup.py entry points
+-------------------------------------------
+
+.. note:: This is an experimental feature, use with caution.
+
+You can also add Scrapy commands from an external library by adding a
+``scrapy.commands`` section in the entry points of the library ``setup.py``
+file.
+
+The following example adds ``my_command`` command:
+
+.. skip: next
+
+.. code-block:: python
+
+  from setuptools import setup, find_packages
+
+  setup(name='scrapy-mymodule',
+    entry_points={
+      'scrapy.commands': [
+        'my_command=my_scrapy_module.commands:MyCommand',
+      ],
+    },
+   )
diff --git a/docs/topics/contracts.rst b/docs/topics/contracts.rst
index ba1421c42..430720fe3 100644
--- a/docs/topics/contracts.rst
+++ b/docs/topics/contracts.rst
@@ -6,10 +6,6 @@ Spiders Contracts
 
 .. versionadded:: 0.15
 
-.. note:: This is a new feature (introduced in Scrapy 0.15) and may be subject
-   to minor functionality/API updates. Check the :ref:`release notes <news>` to
-   be notified of updates.
-
 Testing spiders can get particularly annoying and while nothing prevents you
 from writing unit tests the task gets cumbersome quickly. Scrapy offers an
 integrated way of testing your spiders by the means of contracts.
@@ -35,12 +31,20 @@ This callback is tested using three built-in contracts:
 
 .. class:: UrlContract
 
-    This contract (``@url``) sets the sample url used when checking other
+    This contract (``@url``) sets the sample URL used when checking other
     contract conditions for this spider. This contract is mandatory. All
     callbacks lacking this contract are ignored when running the checks::
 
     @url url
 
+.. class:: CallbackKeywordArgumentsContract
+
+    This contract (``@cb_kwargs``) sets the :attr:`cb_kwargs <scrapy.http.Request.cb_kwargs>`
+    attribute for the sample request. It must be a valid JSON dictionary.
+    ::
+
+    @cb_kwargs {"arg1": "value1", "arg2": "value2", ...}
+
 .. class:: ReturnsContract
 
     This contract (``@returns``) sets lower and upper bounds for the items and
@@ -60,7 +64,7 @@ Use the :command:`check` command to run the contract checks.
 Custom Contracts
 ================
 
-If you find you need more power than the built-in scrapy contracts you can
+If you find you need more power than the built-in Scrapy contracts you can
 create and load your own contracts in the project by using the
 :setting:`SPIDER_CONTRACTS` setting::
 
@@ -69,15 +73,15 @@ create and load your own contracts in the project by using the
         'myproject.contracts.ItemValidate': 10,
     }
 
-Each contract must inherit from :class:`scrapy.contracts.Contract` and can
+Each contract must inherit from :class:`~scrapy.contracts.Contract` and can
 override three methods:
 
 .. module:: scrapy.contracts
 
-.. class:: Contract(method, \*args)
+.. class:: Contract(method, *args)
 
     :param method: callback function to which the contract is associated
-    :type method: function
+    :type method: collections.abc.Callable
 
     :param args: list of arguments passed into the docstring (whitespace
         separated)
@@ -86,8 +90,11 @@ override three methods:
     .. method:: Contract.adjust_request_args(args)
 
         This receives a ``dict`` as an argument containing default arguments
-        for :class:`~scrapy.http.Request` object. Must return the same or a
-        modified version of it.
+        for request object. :class:`~scrapy.http.Request` is used by default,
+        but this can be changed with the ``request_cls`` attribute.
+        If multiple contracts in chain have this attribute defined, the last one is used.
+
+        Must return the same or a modified version of it.
 
     .. method:: Contract.pre_process(response)
 
@@ -99,9 +106,14 @@ override three methods:
         This allows processing the output of the callback. Iterators are
         converted listified before being passed to this hook.
 
+Raise :class:`~scrapy.exceptions.ContractFail` from
+:class:`~scrapy.contracts.Contract.pre_process` or
+:class:`~scrapy.contracts.Contract.post_process` if expectations are not met:
+
+.. autoclass:: scrapy.exceptions.ContractFail
+
 Here is a demo contract which checks the presence of a custom header in the
-response received. Raise :class:`scrapy.exceptions.ContractFail` in order to
-get the failures pretty printed::
+response received::
 
     from scrapy.contracts import Contract
     from scrapy.exceptions import ContractFail
@@ -117,3 +129,22 @@ get the failures pretty printed::
             for header in self.args:
                 if header not in response.headers:
                     raise ContractFail('X-CustomHeader not present')
+
+.. _detecting-contract-check-runs:
+
+Detecting check runs
+====================
+
+When ``scrapy check`` is running, the ``SCRAPY_CHECK`` environment variable is
+set to the ``true`` string. You can use :data:`os.environ` to perform any change to
+your spiders or your settings when ``scrapy check`` is used::
+
+    import os
+    import scrapy
+
+    class ExampleSpider(scrapy.Spider):
+        name = 'example'
+
+        def __init__(self):
+            if os.environ.get('SCRAPY_CHECK'):
+                pass  # Do some scraper adjustments when a check is running
diff --git a/docs/topics/coroutines.rst b/docs/topics/coroutines.rst
new file mode 100644
index 000000000..a0952d323
--- /dev/null
+++ b/docs/topics/coroutines.rst
@@ -0,0 +1,112 @@
+==========
+Coroutines
+==========
+
+.. versionadded:: 2.0
+
+Scrapy has :ref:`partial support <coroutine-support>` for the
+:ref:`coroutine syntax <async>`.
+
+.. _coroutine-support:
+
+Supported callables
+===================
+
+The following callables may be defined as coroutines using ``async def``, and
+hence use coroutine syntax (e.g. ``await``, ``async for``, ``async with``):
+
+-   :class:`~scrapy.http.Request` callbacks.
+
+    The following are known caveats of the current implementation that we aim
+    to address in future versions of Scrapy:
+
+    -   The callback output is not processed until the whole callback finishes.
+
+        As a side effect, if the callback raises an exception, none of its
+        output is processed.
+
+    -   Because `asynchronous generators were introduced in Python 3.6`_, you
+        can only use ``yield`` if you are using Python 3.6 or later.
+
+        If you need to output multiple items or requests and you are using
+        Python 3.5, return an iterable (e.g. a list) instead.
+
+-   The :meth:`process_item` method of
+    :ref:`item pipelines <topics-item-pipeline>`.
+
+-   The
+    :meth:`~scrapy.downloadermiddlewares.DownloaderMiddleware.process_request`,
+    :meth:`~scrapy.downloadermiddlewares.DownloaderMiddleware.process_response`,
+    and
+    :meth:`~scrapy.downloadermiddlewares.DownloaderMiddleware.process_exception`
+    methods of
+    :ref:`downloader middlewares <topics-downloader-middleware-custom>`.
+
+-   :ref:`Signal handlers that support deferreds <signal-deferred>`.
+
+.. _asynchronous generators were introduced in Python 3.6: https://www.python.org/dev/peps/pep-0525/
+
+Usage
+=====
+
+There are several use cases for coroutines in Scrapy. Code that would
+return Deferreds when written for previous Scrapy versions, such as downloader
+middlewares and signal handlers, can be rewritten to be shorter and cleaner::
+
+    from itemadapter import ItemAdapter
+
+    class DbPipeline:
+        def _update_item(self, data, item):
+            adapter = ItemAdapter(item)
+            adapter['field'] = data
+            return item
+
+        def process_item(self, item, spider):
+            adapter = ItemAdapter(item)
+            dfd = db.get_some_data(adapter['id'])
+            dfd.addCallback(self._update_item, item)
+            return dfd
+
+becomes::
+
+    from itemadapter import ItemAdapter
+
+    class DbPipeline:
+        async def process_item(self, item, spider):
+            adapter = ItemAdapter(item)
+            adapter['field'] = await db.get_some_data(adapter['id'])
+            return item
+
+Coroutines may be used to call asynchronous code. This includes other
+coroutines, functions that return Deferreds and functions that return
+:term:`awaitable objects <awaitable>` such as :class:`~asyncio.Future`.
+This means you can use many useful Python libraries providing such code::
+
+    class MySpider(Spider):
+        # ...
+        async def parse_with_deferred(self, response):
+            additional_response = await treq.get('https://additional.url')
+            additional_data = await treq.content(additional_response)
+            # ... use response and additional_data to yield items and requests
+
+        async def parse_with_asyncio(self, response):
+            async with aiohttp.ClientSession() as session:
+                async with session.get('https://additional.url') as additional_response:
+                    additional_data = await r.text()
+            # ... use response and additional_data to yield items and requests
+
+.. note:: Many libraries that use coroutines, such as `aio-libs`_, require the
+          :mod:`asyncio` loop and to use them you need to
+          :doc:`enable asyncio support in Scrapy<asyncio>`.
+
+Common use cases for asynchronous code include:
+
+* requesting data from websites, databases and other services (in callbacks,
+  pipelines and middlewares);
+* storing data in databases (in pipelines and middlewares);
+* delaying the spider initialization until some external event (in the
+  :signal:`spider_opened` handler);
+* calling asynchronous Scrapy methods like ``ExecutionEngine.download`` (see
+  :ref:`the screenshot pipeline example<ScreenshotPipeline>`).
+
+.. _aio-libs: https://github.com/aio-libs
diff --git a/docs/topics/debug.rst b/docs/topics/debug.rst
index 5739d9e1b..d75f17301 100644
--- a/docs/topics/debug.rst
+++ b/docs/topics/debug.rst
@@ -5,7 +5,7 @@ Debugging Spiders
 =================
 
 This document explains the most common techniques for debugging spiders.
-Consider the following scrapy spider below::
+Consider the following Scrapy spider below::
 
     import scrapy
     from myproject.items import MyItem
@@ -18,24 +18,25 @@ Consider the following scrapy spider below::
             )
 
         def parse(self, response):
-            # collect `item_urls`
+            # <processing code not shown>
+            # collect `item_urls` 
             for item_url in item_urls:
                 yield scrapy.Request(item_url, self.parse_item)
 
         def parse_item(self, response):
+            # <processing code not shown>
             item = MyItem()
             # populate `item` fields
             # and extract item_details_url
-            yield scrapy.Request(item_details_url, self.parse_details, meta={'item': item})
+            yield scrapy.Request(item_details_url, self.parse_details, cb_kwargs={'item': item})
 
-        def parse_details(self, response):
-            item = response.meta['item']
+        def parse_details(self, response, item):
             # populate more `item` fields
             return item
 
 Basically this is a simple spider which parses two pages of items (the
 start_urls). Items also have a details page with additional information, so we
-use the ``meta`` functionality of :class:`~scrapy.http.Request` to pass a
+use the ``cb_kwargs`` functionality of :class:`~scrapy.http.Request` to pass a
 partially populated item.
 
 
@@ -47,6 +48,10 @@ The most basic way of checking the output of your spider is to use the
 of the spider at the method level. It has the advantage of being flexible and
 simple to use, but does not allow debugging code inside a method.
 
+.. highlight:: none
+
+.. skip: start
+
 In order to see the item scraped from a specific url::
 
     $ scrapy parse --spider=myspider -c parse_item -d 2 <item_url>
@@ -84,6 +89,8 @@ using::
 
     $ scrapy parse --spider=myspider -d 3 'http://example.com/page1'
 
+.. skip: end
+
 
 Scrapy Shell
 ============
@@ -93,13 +100,14 @@ spider, it is of little help to check what happens inside a callback, besides
 showing the response received and the output. How to debug the situation when
 ``parse_details`` sometimes receives no item?
 
+.. highlight:: python
+
 Fortunately, the :command:`shell` is your bread and butter in this case (see
 :ref:`topics-shell-inspect-response`)::
 
     from scrapy.shell import inspect_response
 
-    def parse_details(self, response):
-        item = response.meta.get('item', None)
+    def parse_details(self, response, item=None):
         if item:
             # populate more `item` fields
             return item
@@ -132,17 +140,13 @@ Logging is another useful option for getting information about your spider run.
 Although not as convenient, it comes with the advantage that the logs will be
 available in all future runs should they be necessary again::
 
-    from scrapy import log
-
-    def parse_details(self, response):
-        item = response.meta.get('item', None)
+    def parse_details(self, response, item=None):
         if item:
             # populate more `item` fields
             return item
         else:
-            self.log('No item received for %s' % response.url,
-                level=log.WARNING)
+            self.logger.warning('No item received for %s', response.url)
 
 For more information, check the :ref:`topics-logging` section.
 
-.. _base tag: http://www.w3schools.com/tags/tag_base.asp
+.. _base tag: https://www.w3schools.com/tags/tag_base.asp
diff --git a/docs/topics/deploy.rst b/docs/topics/deploy.rst
new file mode 100644
index 000000000..361914a29
--- /dev/null
+++ b/docs/topics/deploy.rst
@@ -0,0 +1,59 @@
+.. _topics-deploy:
+
+=================
+Deploying Spiders
+=================
+
+This section describes the different options you have for deploying your Scrapy
+spiders to run them on a regular basis. Running Scrapy spiders in your local
+machine is very convenient for the (early) development stage, but not so much
+when you need to execute long-running spiders or move spiders to run in
+production continuously. This is where the solutions for deploying Scrapy
+spiders come in.
+
+Popular choices for deploying Scrapy spiders are:
+
+* :ref:`Scrapyd <deploy-scrapyd>` (open source)
+* :ref:`Scrapy Cloud <deploy-scrapy-cloud>` (cloud-based)
+
+.. _deploy-scrapyd:
+
+Deploying to a Scrapyd Server
+=============================
+
+`Scrapyd`_ is an open source application to run Scrapy spiders. It provides
+a server with HTTP API, capable of running and monitoring Scrapy spiders.
+
+To deploy spiders to Scrapyd, you can use the scrapyd-deploy tool provided by
+the `scrapyd-client`_ package. Please refer to the `scrapyd-deploy
+documentation`_ for more information.
+
+Scrapyd is maintained by some of the Scrapy developers.
+
+.. _deploy-scrapy-cloud:
+
+Deploying to Scrapy Cloud
+=========================
+
+`Scrapy Cloud`_ is a hosted, cloud-based service by `Scrapinghub`_,
+the company behind Scrapy.
+
+Scrapy Cloud removes the need to setup and monitor servers
+and provides a nice UI to manage spiders and review scraped items,
+logs and stats.
+
+To deploy spiders to Scrapy Cloud you can use the `shub`_ command line tool.
+Please refer to the `Scrapy Cloud documentation`_ for more information.
+
+Scrapy Cloud is compatible with Scrapyd and one can switch between
+them as needed - the configuration is read from the ``scrapy.cfg`` file
+just like ``scrapyd-deploy``.
+
+.. _Scrapyd: https://github.com/scrapy/scrapyd
+.. _Deploying your project: https://scrapyd.readthedocs.io/en/latest/deploy.html
+.. _Scrapy Cloud: https://scrapinghub.com/scrapy-cloud
+.. _scrapyd-client: https://github.com/scrapy/scrapyd-client
+.. _shub: https://doc.scrapinghub.com/shub.html
+.. _scrapyd-deploy documentation: https://scrapyd.readthedocs.io/en/latest/deploy.html
+.. _Scrapy Cloud documentation: https://doc.scrapinghub.com/scrapy-cloud.html
+.. _Scrapinghub: https://scrapinghub.com/
diff --git a/docs/topics/developer-tools.rst b/docs/topics/developer-tools.rst
new file mode 100644
index 000000000..101aa159c
--- /dev/null
+++ b/docs/topics/developer-tools.rst
@@ -0,0 +1,311 @@
+.. _topics-developer-tools:
+
+=================================================
+Using your browser's Developer Tools for scraping
+=================================================
+
+Here is a general guide on how to use your browser's Developer Tools
+to ease the scraping process. Today almost all browsers come with 
+built in `Developer Tools`_ and although we will use Firefox in this
+guide, the concepts are applicable to any other browser. 
+
+In this guide we'll introduce the basic tools to use from a browser's
+Developer Tools by scraping `quotes.toscrape.com`_.
+
+.. _topics-livedom:
+
+Caveats with inspecting the live browser DOM
+============================================
+
+Since Developer Tools operate on a live browser DOM, what you'll actually see
+when inspecting the page source is not the original HTML, but a modified one
+after applying some browser clean up and executing Javascript code.  Firefox,
+in particular, is known for adding ``<tbody>`` elements to tables.  Scrapy, on
+the other hand, does not modify the original page HTML, so you won't be able to
+extract any data if you use ``<tbody>`` in your XPath expressions.
+
+Therefore, you should keep in mind the following things:
+
+* Disable Javascript while inspecting the DOM looking for XPaths to be
+  used in Scrapy (in the Developer Tools settings click `Disable JavaScript`)
+
+* Never use full XPath paths, use relative and clever ones based on attributes
+  (such as ``id``, ``class``, ``width``, etc) or any identifying features like
+  ``contains(@href, 'image')``.
+
+* Never include ``<tbody>`` elements in your XPath expressions unless you
+  really know what you're doing
+
+.. _topics-inspector:
+
+Inspecting a website
+====================
+
+By far the most handy feature of the Developer Tools is the `Inspector` 
+feature, which allows you to inspect the underlying HTML code of 
+any webpage. To demonstrate the Inspector, let's look at the 
+`quotes.toscrape.com`_-site.
+
+On the site we have a total of ten quotes from various authors with specific
+tags, as well as the Top Ten Tags. Let's say we want to extract all the quotes 
+on this page, without any meta-information about authors, tags, etc. 
+
+Instead of viewing the whole source code for the page, we can simply right click 
+on a quote and select ``Inspect Element (Q)``, which opens up the `Inspector`.
+In it you should see something like this:
+
+.. image:: _images/inspector_01.png
+   :width: 777
+   :height: 469
+   :alt: Firefox's Inspector-tool
+
+The interesting part for us is this:
+
+.. code-block:: html
+
+    <div class="quote" itemscope="" itemtype="http://schema.org/CreativeWork">
+      <span class="text" itemprop="text">(...)</span>
+      <span>(...)</span>
+      <div class="tags">(...)</div>
+    </div>
+
+If you hover over the first ``div`` directly above the ``span`` tag highlighted
+in the screenshot, you'll see that the corresponding section of the webpage gets
+highlighted as well. So now we have a section, but we can't find our quote text
+anywhere.
+
+The advantage of the `Inspector` is that it automatically expands and collapses
+sections and tags of a webpage, which greatly improves readability. You can
+expand and collapse a tag by clicking on the arrow in front of it or by double
+clicking directly on the tag. If we expand the ``span`` tag with the ``class=
+"text"`` we will see the quote-text we clicked on. The `Inspector` lets you
+copy XPaths to selected elements. Let's try it out.
+
+First open the Scrapy shell at http://quotes.toscrape.com/ in a terminal:
+
+.. code-block:: none
+
+    $ scrapy shell "http://quotes.toscrape.com/"
+
+Then, back to your web browser, right-click on the ``span`` tag, select
+``Copy > XPath`` and paste it in the Scrapy shell like so:
+
+.. invisible-code-block: python
+
+    response = load_response('http://quotes.toscrape.com/', 'quotes.html')
+
+>>> response.xpath('/html/body/div/div[2]/div[1]/div[1]/span[1]/text()').getall()
+['“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”']
+
+Adding ``text()`` at the end we are able to extract the first quote with this 
+basic selector. But this XPath is not really that clever. All it does is
+go down a desired path in the source code starting from ``html``. So let's 
+see if we can refine our XPath a bit: 
+
+If we check the `Inspector` again we'll see that directly beneath our 
+expanded ``div`` tag we have nine identical ``div`` tags, each with the 
+same attributes as our first. If we expand any of them, we'll see the same 
+structure as with our first quote: Two ``span`` tags and one ``div`` tag. We can
+expand each ``span`` tag with the ``class="text"`` inside our ``div`` tags and 
+see each quote:
+
+.. code-block:: html
+
+    <div class="quote" itemscope="" itemtype="http://schema.org/CreativeWork">
+      <span class="text" itemprop="text">
+        “The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”
+      </span>
+      <span>(...)</span>
+      <div class="tags">(...)</div>
+    </div>
+
+
+With this knowledge we can refine our XPath: Instead of a path to follow,
+we'll simply select all ``span`` tags with the ``class="text"`` by using 
+the `has-class-extension`_:
+
+>>> response.xpath('//span[has-class("text")]/text()').getall()
+['“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”',
+'“It is our choices, Harry, that show what we truly are, far more than our abilities.”',
+'“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”',
+...]
+
+And with one simple, cleverer XPath we are able to extract all quotes from 
+the page. We could have constructed a loop over our first XPath to increase 
+the number of the last ``div``, but this would have been unnecessarily 
+complex and by simply constructing an XPath with ``has-class("text")``
+we were able to extract all quotes in one line. 
+
+The `Inspector` has a lot of other helpful features, such as searching in the 
+source code or directly scrolling to an element you selected. Let's demonstrate
+a use case: 
+
+Say you want to find the ``Next`` button on the page. Type ``Next`` into the 
+search bar on the top right of the `Inspector`. You should get two results. 
+The first is a ``li`` tag with the ``class="next"``, the second the text 
+of an ``a`` tag. Right click on the ``a`` tag and select ``Scroll into View``.
+If you hover over the tag, you'll see the button highlighted. From here
+we could easily create a :ref:`Link Extractor <topics-link-extractors>` to 
+follow the pagination. On a simple site such as this, there may not be 
+the need to find an element visually but the ``Scroll into View`` function
+can be quite useful on complex sites. 
+
+Note that the search bar can also be used to search for and test CSS
+selectors. For example, you could search for ``span.text`` to find 
+all quote texts. Instead of a full text search, this searches for 
+exactly the ``span`` tag with the ``class="text"`` in the page. 
+
+.. _topics-network-tool:
+
+The Network-tool
+================
+While scraping you may come across dynamic webpages where some parts
+of the page are loaded dynamically through multiple requests. While 
+this can be quite tricky, the `Network`-tool in the Developer Tools 
+greatly facilitates this task. To demonstrate the Network-tool, let's
+take a look at the page `quotes.toscrape.com/scroll`_. 
+
+The page is quite similar to the basic `quotes.toscrape.com`_-page, 
+but instead of the above-mentioned ``Next`` button, the page 
+automatically loads new quotes when you scroll to the bottom. We 
+could go ahead and try out different XPaths directly, but instead 
+we'll check another quite useful command from the Scrapy shell:
+
+.. skip: next
+
+.. code-block:: none
+
+  $ scrapy shell "quotes.toscrape.com/scroll"
+  (...)
+  >>> view(response)
+
+A browser window should open with the webpage but with one 
+crucial difference: Instead of the quotes we just see a greenish 
+bar with the word ``Loading...``. 
+
+.. image:: _images/network_01.png
+   :width: 777
+   :height: 296
+   :alt: Response from quotes.toscrape.com/scroll
+
+The ``view(response)`` command let's us view the response our
+shell or later our spider receives from the server. Here we see 
+that some basic template is loaded which includes the title, 
+the login-button and the footer, but the quotes are missing. This
+tells us that the quotes are being loaded from a different request
+than ``quotes.toscrape/scroll``. 
+
+If you click on the ``Network`` tab, you will probably only see 
+two entries. The first thing we do is enable persistent logs by 
+clicking on ``Persist Logs``. If this option is disabled, the 
+log is automatically cleared each time you navigate to a different
+page. Enabling this option is a good default, since it gives us 
+control on when to clear the logs. 
+
+If we reload the page now, you'll see the log get populated with six
+new requests. 
+
+.. image:: _images/network_02.png
+   :width: 777
+   :height: 241
+   :alt: Network tab with persistent logs and requests
+
+Here we see every request that has been made when reloading the page
+and can inspect each request and its response. So let's find out
+where our quotes are coming from: 
+
+First click on the request with the name ``scroll``. On the right 
+you can now inspect the request. In ``Headers`` you'll find details
+about the request headers, such as the URL, the method, the IP-address,
+and so on. We'll ignore the other tabs and click directly on ``Response``.
+
+What you should see in the ``Preview`` pane is the rendered HTML-code, 
+that is exactly what we saw when we called ``view(response)`` in the 
+shell. Accordingly the ``type`` of the request in the log is ``html``. 
+The other requests have types like ``css`` or ``js``, but what 
+interests us is the one request called ``quotes?page=1`` with the 
+type ``json``. 
+
+If we click on this request, we see that the request URL is 
+``http://quotes.toscrape.com/api/quotes?page=1`` and the response
+is a JSON-object that contains our quotes. We can also right-click
+on the request and open ``Open in new tab`` to get a better overview. 
+
+.. image:: _images/network_03.png
+   :width: 777
+   :height: 375
+   :alt: JSON-object returned from the quotes.toscrape API
+
+With this response we can now easily parse the JSON-object and 
+also request each page to get every quote on the site::
+
+    import scrapy
+    import json
+
+
+    class QuoteSpider(scrapy.Spider):
+        name = 'quote'
+        allowed_domains = ['quotes.toscrape.com']
+        page = 1
+        start_urls = ['http://quotes.toscrape.com/api/quotes?page=1']
+
+        def parse(self, response):
+            data = json.loads(response.text)
+            for quote in data["quotes"]:
+                yield {"quote": quote["text"]}
+            if data["has_next"]:
+                self.page += 1
+                url = "http://quotes.toscrape.com/api/quotes?page={}".format(self.page)            
+                yield scrapy.Request(url=url, callback=self.parse)
+
+This spider starts at the first page of the quotes-API. With each 
+response, we parse the ``response.text`` and assign it to ``data``. 
+This lets us operate on the JSON-object like on a Python dictionary. 
+We iterate through the ``quotes`` and print out the ``quote["text"]``.
+If the handy ``has_next`` element is ``true`` (try loading 
+`quotes.toscrape.com/api/quotes?page=10`_ in your browser or a
+page-number greater than 10), we increment the ``page`` attribute 
+and ``yield`` a new request, inserting the incremented page-number 
+into our ``url``.
+
+.. _requests-from-curl:
+
+In more complex websites, it could be difficult to easily reproduce the
+requests, as we could need to add ``headers`` or ``cookies`` to make it work.
+In those cases you can export the requests in `cURL <https://curl.haxx.se/>`_
+format, by right-clicking on each of them in the network tool and using the
+:meth:`~scrapy.http.Request.from_curl()` method to generate an equivalent
+request::
+
+    from scrapy import Request
+
+    request = Request.from_curl(
+        "curl 'http://quotes.toscrape.com/api/quotes?page=1' -H 'User-Agent: Mozil"
+        "la/5.0 (X11; Linux x86_64; rv:67.0) Gecko/20100101 Firefox/67.0' -H 'Acce"
+        "pt: */*' -H 'Accept-Language: ca,en-US;q=0.7,en;q=0.3' --compressed -H 'X"
+        "-Requested-With: XMLHttpRequest' -H 'Proxy-Authorization: Basic QFRLLTAzM"
+        "zEwZTAxLTk5MWUtNDFiNC1iZWRmLTJjNGI4M2ZiNDBmNDpAVEstMDMzMTBlMDEtOTkxZS00MW"
+        "I0LWJlZGYtMmM0YjgzZmI0MGY0' -H 'Connection: keep-alive' -H 'Referer: http"
+        "://quotes.toscrape.com/scroll' -H 'Cache-Control: max-age=0'")
+
+Alternatively, if you want to know the arguments needed to recreate that
+request you can use the :func:`~scrapy.utils.curl.curl_to_request_kwargs`
+function to get a dictionary with the equivalent arguments:
+
+.. autofunction:: scrapy.utils.curl.curl_to_request_kwargs
+
+Note that to translate a cURL command into a Scrapy request,
+you may use `curl2scrapy <https://michael-shub.github.io/curl2scrapy/>`_.
+
+As you can see, with a few inspections in the `Network`-tool we
+were able to easily replicate the dynamic requests of the scrolling 
+functionality of the page. Crawling dynamic pages can be quite
+daunting and pages can be very complex, but it (mostly) boils down
+to identifying the correct request and replicating it in your spider.
+
+.. _Developer Tools: https://en.wikipedia.org/wiki/Web_development_tools
+.. _quotes.toscrape.com: http://quotes.toscrape.com
+.. _quotes.toscrape.com/scroll: http://quotes.toscrape.com/scroll
+.. _quotes.toscrape.com/api/quotes?page=10: http://quotes.toscrape.com/api/quotes?page=10
+.. _has-class-extension: https://parsel.readthedocs.io/en/latest/usage.html#other-xpath-extensions
+
diff --git a/docs/topics/djangoitem.rst b/docs/topics/djangoitem.rst
index 84417dfab..6a3621192 100644
--- a/docs/topics/djangoitem.rst
+++ b/docs/topics/djangoitem.rst
@@ -1,149 +1,13 @@
+:orphan:
+
 .. _topics-djangoitem:
 
 ==========
 DjangoItem
 ==========
 
-:class:`DjangoItem` is a class of item that gets its fields definition from a
-Django model, you simply create a :class:`DjangoItem` and specify what Django
-model it relates to.
+DjangoItem has been moved into a separate project.
 
-Besides of getting the model fields defined on your item, :class:`DjangoItem`
-provides a method to create and populate a Django model instance with the item
-data.
+It is hosted at:
 
-Using DjangoItem
-================
-
-:class:`DjangoItem` works much like ModelForms in Django, you create a subclass
-and define its ``django_model`` attribute to be a valid Django model. With this
-you will get an item with a field for each Django model field.
-
-In addition, you can define fields that aren't present in the model and even
-override fields that are present in the model defining them in the item.
-
-Let's see some examples:
-
-Creating a Django model for the examples::
-
-    from django.db import models
-
-    class Person(models.Model):
-        name = models.CharField(max_length=255)
-        age = models.IntegerField()
-
-Defining a basic :class:`DjangoItem`::
-
-    from scrapy.contrib.djangoitem import DjangoItem
-
-    class PersonItem(DjangoItem):
-        django_model = Person
-
-:class:`DjangoItem` work just like :class:`~scrapy.item.Item`::
-
-    >>> p = PersonItem()
-    >>> p['name'] = 'John'
-    >>> p['age'] = '22'
-
-To obtain the Django model from the item, we call the extra method
-:meth:`~DjangoItem.save` of the :class:`DjangoItem`::
-
-    >>> person = p.save()
-    >>> person.name
-    'John'
-    >>> person.age
-    '22'
-    >>> person.id
-    1
-
-The model is already saved when we call :meth:`~DjangoItem.save`, we
-can prevent this by calling it with ``commit=False``. We can use
-``commit=False`` in :meth:`~DjangoItem.save` method to obtain an unsaved model::
-
-    >>> person = p.save(commit=False)
-    >>> person.name
-    'John'
-    >>> person.age
-    '22'
-    >>> person.id
-    None
-
-As said before, we can add other fields to the item::
-
-    import scrapy
-    from scrapy.contrib.djangoitem import DjangoItem
-
-    class PersonItem(DjangoItem):
-        django_model = Person
-        sex = scrapy.Field()
-
-::
-
-   >>> p = PersonItem()
-   >>> p['name'] = 'John'
-   >>> p['age'] = '22'
-   >>> p['sex'] = 'M'
-
-.. note:: fields added to the item won't be taken into account when doing a :meth:`~DjangoItem.save`
-
-And we can override the fields of the model with your own::
-
-    class PersonItem(DjangoItem):
-        django_model = Person
-        name = scrapy.Field(default='No Name')
-
-This is useful to provide properties to the field, like a default or any other
-property that your project uses.
-
-DjangoItem caveats
-==================
-
-DjangoItem is a rather convenient way to integrate Scrapy projects with Django
-models, but bear in mind that Django ORM may not scale well if you scrape a lot
-of items (ie. millions) with Scrapy. This is because a relational backend is
-often not a good choice for a write intensive application (such as a web
-crawler), specially if the database is highly normalized and with many indices.
-
-Django settings set up
-======================
-
-To use the Django models outside the Django application you need to set up the
-``DJANGO_SETTINGS_MODULE`` environment variable and --in most cases-- modify
-the ``PYTHONPATH`` environment variable to be able to import the settings
-module.
-
-There are many ways to do this depending on your use case and preferences.
-Below is detailed one of the simplest ways to do it.
-
-Suppose your Django project is named ``mysite``, is located in the path
-``/home/projects/mysite`` and you have created an app ``myapp`` with the model
-``Person``. That means your directory structure is something like this::
-
-    /home/projects/mysite
-    ├── manage.py
-    ├── myapp
-    │   ├── __init__.py
-    │   ├── models.py
-    │   ├── tests.py
-    │   └── views.py
-    └── mysite
-        ├── __init__.py
-        ├── settings.py
-        ├── urls.py
-        └── wsgi.py
-
-Then you need to add ``/home/projects/mysite`` to the ``PYTHONPATH``
-environment variable and set up the environment variable
-``DJANGO_SETTINGS_MODULE`` to ``mysite.settings``. That can be done in your
-Scrapy's settings file by adding the lines below::
-
-  import sys
-  sys.path.append('/home/projects/mysite')
-
-  import os
-  os.environ['DJANGO_SETTINGS_MODULE'] = 'mysite.settings'
-
-Notice that we modify the ``sys.path`` variable instead the ``PYTHONPATH``
-environment variable as we are already within the python runtime. If everything
-is right, you should be able to start the ``scrapy shell`` command and import
-the model ``Person`` (i.e. ``from myapp.models import Person``).
+    https://github.com/scrapy-plugins/scrapy-djangoitem
diff --git a/docs/topics/downloader-middleware.rst b/docs/topics/downloader-middleware.rst
index 614e4fff6..323e553e5 100644
--- a/docs/topics/downloader-middleware.rst
+++ b/docs/topics/downloader-middleware.rst
@@ -24,10 +24,14 @@ Here's an example::
     }
 
 The :setting:`DOWNLOADER_MIDDLEWARES` setting is merged with the
-:setting:`DOWNLOADER_MIDDLEWARES_BASE` setting defined in Scrapy (and not meant to
-be overridden) and then sorted by order to get the final sorted list of enabled
-middlewares: the first middleware is the one closer to the engine and the last
-is the one closer to the downloader.
+:setting:`DOWNLOADER_MIDDLEWARES_BASE` setting defined in Scrapy (and not meant
+to be overridden) and then sorted by order to get the final sorted list of
+enabled middlewares: the first middleware is the one closer to the engine and
+the last is the one closer to the downloader. In other words,
+the :meth:`~scrapy.downloadermiddlewares.DownloaderMiddleware.process_request`
+method of each middleware will be invoked in increasing
+middleware order (100, 200, 300, ...) and the :meth:`~scrapy.downloadermiddlewares.DownloaderMiddleware.process_response` method
+of each middleware will be invoked in decreasing order.
 
 To decide which order to assign to your middleware see the
 :setting:`DOWNLOADER_MIDDLEWARES_BASE` setting and pick a value according to
@@ -37,27 +41,35 @@ previous (or subsequent) middleware being applied.
 
 If you want to disable a built-in middleware (the ones defined in
 :setting:`DOWNLOADER_MIDDLEWARES_BASE` and enabled by default) you must define it
-in your project's :setting:`DOWNLOADER_MIDDLEWARES` setting and assign `None`
+in your project's :setting:`DOWNLOADER_MIDDLEWARES` setting and assign ``None``
 as its value.  For example, if you want to disable the user-agent middleware::
 
     DOWNLOADER_MIDDLEWARES = {
         'myproject.middlewares.CustomDownloaderMiddleware': 543,
-        'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': None,
+        'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': None,
     }
 
 Finally, keep in mind that some middlewares may need to be enabled through a
 particular setting. See each middleware documentation for more info.
 
+.. _topics-downloader-middleware-custom:
+
 Writing your own downloader middleware
 ======================================
 
-Writing your own downloader middleware is easy. Each middleware component is a
-single Python class that defines one or more of the following methods:
+Each downloader middleware is a Python class that defines one or more of the
+methods defined below.
 
-.. module:: scrapy.contrib.downloadermiddleware
+The main entry point is the ``from_crawler`` class method, which receives a
+:class:`~scrapy.crawler.Crawler` instance. The :class:`~scrapy.crawler.Crawler`
+object gives you access, for example, to the :ref:`settings <topics-settings>`.
+
+.. module:: scrapy.downloadermiddlewares
 
 .. class:: DownloaderMiddleware
 
+   .. note::  Any of the downloader middleware methods may also return a deferred.
+
    .. method:: process_request(request, spider)
 
       This method is called for each request that goes through the download
@@ -91,7 +103,7 @@ single Python class that defines one or more of the following methods:
       :type request: :class:`~scrapy.http.Request` object
 
       :param spider: the spider for which this request is intended
-      :type spider: :class:`~scrapy.spider.Spider` object
+      :type spider: :class:`~scrapy.spiders.Spider` object
 
    .. method:: process_response(request, response, spider)
 
@@ -118,7 +130,7 @@ single Python class that defines one or more of the following methods:
       :type response: :class:`~scrapy.http.Response` object
 
       :param spider: the spider for which this response is intended
-      :type spider: :class:`~scrapy.spider.Spider` object
+      :type spider: :class:`~scrapy.spiders.Spider` object
 
    .. method:: process_exception(request, exception, spider)
 
@@ -149,7 +161,18 @@ single Python class that defines one or more of the following methods:
       :type exception: an ``Exception`` object
 
       :param spider: the spider for which this request is intended
-      :type spider: :class:`~scrapy.spider.Spider` object
+      :type spider: :class:`~scrapy.spiders.Spider` object
+
+   .. method:: from_crawler(cls, crawler)
+
+      If present, this classmethod is called to create a middleware instance
+      from a :class:`~scrapy.crawler.Crawler`. It must return a new instance
+      of the middleware. Crawler object provides access to all Scrapy core
+      components like settings and signals; it is a way for middleware to
+      access them and hook its functionality into Scrapy.
+
+      :param crawler: crawler that uses this middleware
+      :type crawler: :class:`~scrapy.crawler.Crawler` object
 
 .. _topics-downloader-middleware-ref:
 
@@ -169,16 +192,21 @@ For a list of the components enabled by default (and their orders) see the
 CookiesMiddleware
 -----------------
 
-.. module:: scrapy.contrib.downloadermiddleware.cookies
+.. module:: scrapy.downloadermiddlewares.cookies
    :synopsis: Cookies Downloader Middleware
 
 .. class:: CookiesMiddleware
 
    This middleware enables working with sites that require cookies, such as
    those that use sessions. It keeps track of cookies sent by web servers, and
-   send them back on subsequent requests (from that spider), just like web
+   sends them back on subsequent requests (from that spider), just like web
    browsers do.
 
+   .. caution:: When non-UTF8 encoded byte sequences are passed to a
+      :class:`~scrapy.http.Request`, the ``CookiesMiddleware`` will log
+      a warning. Refer to :ref:`topics-logging-advanced-customization`
+      to customize the logging behaviour.
+
 The following settings can be used to configure the cookie middleware:
 
 * :setting:`COOKIES_ENABLED`
@@ -198,7 +226,7 @@ There is support for keeping multiple cookie sessions per spider by using the
 For example::
 
     for i, url in enumerate(urls):
-        yield scrapy.Request("http://www.example.com", meta={'cookiejar': i},
+        yield scrapy.Request(url, meta={'cookiejar': i},
             callback=self.parse_page)
 
 Keep in mind that the :reqmeta:`cookiejar` meta key is not "sticky". You need to keep
@@ -220,6 +248,15 @@ Default: ``True``
 Whether to enable the cookies middleware. If disabled, no cookies will be sent
 to web servers.
 
+Notice that despite the value of :setting:`COOKIES_ENABLED` setting if
+``Request.``:reqmeta:`meta['dont_merge_cookies'] <dont_merge_cookies>`
+evaluates to ``True`` the request cookies will **not** be sent to the
+web server and received cookies in :class:`~scrapy.http.Response` will
+**not** be merged with the existing cookies.
+
+For more detailed information see the ``cookies`` parameter in
+:class:`~scrapy.http.Request`.
+
 .. setting:: COOKIES_DEBUG
 
 COOKIES_DEBUG
@@ -227,26 +264,26 @@ COOKIES_DEBUG
 
 Default: ``False``
 
-If enabled, Scrapy will log all cookies sent in requests (ie. ``Cookie``
-header) and all cookies received in responses (ie. ``Set-Cookie`` header).
+If enabled, Scrapy will log all cookies sent in requests (i.e. ``Cookie``
+header) and all cookies received in responses (i.e. ``Set-Cookie`` header).
 
 Here's an example of a log with :setting:`COOKIES_DEBUG` enabled::
 
-    2011-04-06 14:35:10-0300 [diningcity] INFO: Spider opened
-    2011-04-06 14:35:10-0300 [diningcity] DEBUG: Sending cookies to: <GET http://www.diningcity.com/netherlands/index.html>
+    2011-04-06 14:35:10-0300 [scrapy.core.engine] INFO: Spider opened
+    2011-04-06 14:35:10-0300 [scrapy.downloadermiddlewares.cookies] DEBUG: Sending cookies to: <GET http://www.diningcity.com/netherlands/index.html>
             Cookie: clientlanguage_nl=en_EN
-    2011-04-06 14:35:14-0300 [diningcity] DEBUG: Received cookies from: <200 http://www.diningcity.com/netherlands/index.html>
+    2011-04-06 14:35:14-0300 [scrapy.downloadermiddlewares.cookies] DEBUG: Received cookies from: <200 http://www.diningcity.com/netherlands/index.html>
             Set-Cookie: JSESSIONID=B~FA4DC0C496C8762AE4F1A620EAB34F38; Path=/
             Set-Cookie: ip_isocode=US
             Set-Cookie: clientlanguage_nl=en_EN; Expires=Thu, 07-Apr-2011 21:21:34 GMT; Path=/
-    2011-04-06 14:49:50-0300 [diningcity] DEBUG: Crawled (200) <GET http://www.diningcity.com/netherlands/index.html> (referer: None)
+    2011-04-06 14:49:50-0300 [scrapy.core.engine] DEBUG: Crawled (200) <GET http://www.diningcity.com/netherlands/index.html> (referer: None)
     [...]
 
 
 DefaultHeadersMiddleware
 ------------------------
 
-.. module:: scrapy.contrib.downloadermiddleware.defaultheaders
+.. module:: scrapy.downloadermiddlewares.defaultheaders
    :synopsis: Default Headers Downloader Middleware
 
 .. class:: DefaultHeadersMiddleware
@@ -257,18 +294,25 @@ DefaultHeadersMiddleware
 DownloadTimeoutMiddleware
 -------------------------
 
-.. module:: scrapy.contrib.downloadermiddleware.downloadtimeout
+.. module:: scrapy.downloadermiddlewares.downloadtimeout
    :synopsis: Download timeout middleware
 
 .. class:: DownloadTimeoutMiddleware
 
     This middleware sets the download timeout for requests specified in the
-    :setting:`DOWNLOAD_TIMEOUT` setting.
+    :setting:`DOWNLOAD_TIMEOUT` setting or :attr:`download_timeout`
+    spider attribute.
+
+.. note::
+
+    You can also set download timeout per-request using
+    :reqmeta:`download_timeout` Request.meta key; this is supported
+    even when DownloadTimeoutMiddleware is disabled.
 
 HttpAuthMiddleware
 ------------------
 
-.. module:: scrapy.contrib.downloadermiddleware.httpauth
+.. module:: scrapy.downloadermiddlewares.httpauth
    :synopsis: HTTP Auth downloader middleware
 
 .. class:: HttpAuthMiddleware
@@ -281,7 +325,7 @@ HttpAuthMiddleware
 
     Example::
 
-        from scrapy.contrib.spiders import CrawlSpider
+        from scrapy.spiders import CrawlSpider
 
         class SomeIntranetSiteSpider(CrawlSpider):
 
@@ -291,13 +335,13 @@ HttpAuthMiddleware
 
             # .. rest of the spider code omitted ...
 
-.. _Basic access authentication: http://en.wikipedia.org/wiki/Basic_access_authentication
+.. _Basic access authentication: https://en.wikipedia.org/wiki/Basic_access_authentication
 
 
 HttpCacheMiddleware
 -------------------
 
-.. module:: scrapy.contrib.downloadermiddleware.httpcache
+.. module:: scrapy.downloadermiddlewares.httpcache
    :synopsis: HTTP Cache downloader middleware
 
 .. class:: HttpCacheMiddleware
@@ -305,13 +349,13 @@ HttpCacheMiddleware
     This middleware provides low-level cache to all HTTP requests and responses.
     It has to be combined with a cache storage backend as well as a cache policy.
 
-    Scrapy ships with two HTTP cache storage backends:
+    Scrapy ships with three HTTP cache storage backends:
 
         * :ref:`httpcache-storage-fs`
         * :ref:`httpcache-storage-dbm`
 
     You can change the HTTP cache storage backend with the :setting:`HTTPCACHE_STORAGE`
-    setting. Or you can also implement your own storage backend.
+    setting. Or you can also :ref:`implement your own storage backend. <httpcache-storage-custom>`
 
     Scrapy ships with two HTTP cache policies:
 
@@ -321,25 +365,29 @@ HttpCacheMiddleware
     You can change the HTTP cache policy with the :setting:`HTTPCACHE_POLICY`
     setting. Or you can also implement your own policy.
 
+    .. reqmeta:: dont_cache
+
+    You can also avoid caching a response on every policy using :reqmeta:`dont_cache` meta key equals ``True``.
+
+.. module:: scrapy.extensions.httpcache
+   :noindex:
 
 .. _httpcache-policy-dummy:
 
 Dummy policy (default)
 ~~~~~~~~~~~~~~~~~~~~~~
 
-This policy has no awareness of any HTTP Cache-Control directives.
-Every request and its corresponding response are cached.  When the same
-request is seen again, the response is returned without transferring
-anything from the Internet.
+.. class:: DummyPolicy
 
-The Dummy policy is useful for testing spiders faster (without having
-to wait for downloads every time) and for trying your spider offline,
-when an Internet connection is not available. The goal is to be able to
-"replay" a spider run *exactly as it ran before*.
+    This policy has no awareness of any HTTP Cache-Control directives.
+    Every request and its corresponding response are cached.  When the same
+    request is seen again, the response is returned without transferring
+    anything from the Internet.
 
-In order to use this policy, set:
-
-* :setting:`HTTPCACHE_POLICY` to ``scrapy.contrib.httpcache.DummyPolicy``
+    The Dummy policy is useful for testing spiders faster (without having
+    to wait for downloads every time) and for trying your spider offline,
+    when an Internet connection is not available. The goal is to be able to
+    "replay" a spider run *exactly as it ran before*.
 
 
 .. _httpcache-policy-rfc2616:
@@ -347,33 +395,44 @@ In order to use this policy, set:
 RFC2616 policy
 ~~~~~~~~~~~~~~
 
-This policy provides a RFC2616 compliant HTTP cache, i.e. with HTTP
-Cache-Control awareness, aimed at production and used in continuous
-runs to avoid downloading unmodified data (to save bandwidth and speed up crawls).
+.. class:: RFC2616Policy
 
-what is implemented:
+    This policy provides a RFC2616 compliant HTTP cache, i.e. with HTTP
+    Cache-Control awareness, aimed at production and used in continuous
+    runs to avoid downloading unmodified data (to save bandwidth and speed up
+    crawls).
 
-* Do not attempt to store responses/requests with `no-store` cache-control directive set
-* Do not serve responses from cache if `no-cache` cache-control directive is set even for fresh responses
-* Compute freshness lifetime from `max-age` cache-control directive
-* Compute freshness lifetime from `Expires` response header
-* Compute freshness lifetime from `Last-Modified` response header (heuristic used by Firefox)
-* Compute current age from `Age` response header
-* Compute current age from `Date` header
-* Revalidate stale responses based on `Last-Modified` response header
-* Revalidate stale responses based on `ETag` response header
-* Set `Date` header for any received response missing it
+    What is implemented:
 
-what is missing:
+    * Do not attempt to store responses/requests with ``no-store`` cache-control directive set
+    * Do not serve responses from cache if ``no-cache`` cache-control directive is set even for fresh responses
+    * Compute freshness lifetime from ``max-age`` cache-control directive
+    * Compute freshness lifetime from ``Expires`` response header
+    * Compute freshness lifetime from ``Last-Modified`` response header (heuristic used by Firefox)
+    * Compute current age from ``Age`` response header
+    * Compute current age from ``Date`` header
+    * Revalidate stale responses based on ``Last-Modified`` response header
+    * Revalidate stale responses based on ``ETag`` response header
+    * Set ``Date`` header for any received response missing it
+    * Support ``max-stale`` cache-control directive in requests
 
-* `Pragma: no-cache` support http://www.mnot.net/cache_docs/#PRAGMA
-* `Vary` header support http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.6
-* Invalidation after updates or deletes http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.10
-* ... probably others ..
+    This allows spiders to be configured with the full RFC2616 cache policy,
+    but avoid revalidation on a request-by-request basis, while remaining
+    conformant with the HTTP spec.
 
-In order to use this policy, set:
+    Example:
 
-* :setting:`HTTPCACHE_POLICY` to ``scrapy.contrib.httpcache.RFC2616Policy``
+    Add ``Cache-Control: max-stale=600`` to Request headers to accept responses that
+    have exceeded their expiration time by no more than 600 seconds.
+
+    See also: RFC2616, 14.9.3
+
+    What is missing:
+
+    * ``Pragma: no-cache`` support https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.9.1
+    * ``Vary`` header support https://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.6
+    * Invalidation after updates or deletes https://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.10
+    * ... probably others ..
 
 
 .. _httpcache-storage-fs:
@@ -381,67 +440,102 @@ In order to use this policy, set:
 Filesystem storage backend (default)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-File system storage backend is available for the HTTP cache middleware.
+.. class:: FilesystemCacheStorage
 
-In order to use this storage backend, set:
+    File system storage backend is available for the HTTP cache middleware.
 
-* :setting:`HTTPCACHE_STORAGE` to ``scrapy.contrib.httpcache.FilesystemCacheStorage``
+    Each request/response pair is stored in a different directory containing
+    the following files:
 
-Each request/response pair is stored in a different directory containing
-the following files:
+    *   ``request_body`` - the plain request body
 
- * ``request_body`` - the plain request body
- * ``request_headers`` - the request headers (in raw HTTP format)
- * ``response_body`` - the plain response body
- * ``response_headers`` - the request headers (in raw HTTP format)
- * ``meta`` - some metadata of this cache resource in Python ``repr()`` format
-   (grep-friendly format)
- * ``pickled_meta`` - the same metadata in ``meta`` but pickled for more
-   efficient deserialization
+    *   ``request_headers`` - the request headers (in raw HTTP format)
 
-The directory name is made from the request fingerprint (see
-``scrapy.utils.request.fingerprint``), and one level of subdirectories is
-used to avoid creating too many files into the same directory (which is
-inefficient in many file systems). An example directory could be::
+    *   ``response_body`` - the plain response body
 
-   /path/to/cache/dir/example.com/72/72811f648e718090f041317756c03adb0ada46c7
+    *   ``response_headers`` - the request headers (in raw HTTP format)
+
+    *   ``meta`` - some metadata of this cache resource in Python ``repr()``
+        format (grep-friendly format)
+
+    *   ``pickled_meta`` - the same metadata in ``meta`` but pickled for more
+        efficient deserialization
+
+    The directory name is made from the request fingerprint (see
+    ``scrapy.utils.request.fingerprint``), and one level of subdirectories is
+    used to avoid creating too many files into the same directory (which is
+    inefficient in many file systems). An example directory could be::
+
+        /path/to/cache/dir/example.com/72/72811f648e718090f041317756c03adb0ada46c7
 
 .. _httpcache-storage-dbm:
 
 DBM storage backend
 ~~~~~~~~~~~~~~~~~~~
 
-.. versionadded:: 0.13
+.. class:: DbmCacheStorage
 
-A DBM_ storage backend is also available for the HTTP cache middleware.
+    .. versionadded:: 0.13
 
-By default, it uses the anydbm_ module, but you can change it with the
-:setting:`HTTPCACHE_DBM_MODULE` setting.
+    A DBM_ storage backend is also available for the HTTP cache middleware.
 
-In order to use this storage backend, set:
+    By default, it uses the :mod:`dbm`, but you can change it with the
+    :setting:`HTTPCACHE_DBM_MODULE` setting.
 
-* :setting:`HTTPCACHE_STORAGE` to ``scrapy.contrib.httpcache.DbmCacheStorage``
+.. _httpcache-storage-custom:
 
-.. _httpcache-storage-leveldb:
+Writing your own storage backend
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-LevelDB storage backend
-~~~~~~~~~~~~~~~~~~~~~~~
+You can implement a cache storage backend by creating a Python class that
+defines the methods described below.
 
-.. versionadded:: 0.23
+.. module:: scrapy.extensions.httpcache
 
-A LevelDB_ storage backend is also available for the HTTP cache middleware.
+.. class:: CacheStorage
 
-This backend is not recommended for development because only one process can
-access LevelDB databases at the same time, so you can't run a crawl and open
-the scrapy shell in parallel for the same spider.
+    .. method:: open_spider(spider)
 
-In order to use this storage backend:
+      This method gets called after a spider has been opened for crawling. It handles
+      the :signal:`open_spider <spider_opened>` signal.
 
-* set :setting:`HTTPCACHE_STORAGE` to ``scrapy.contrib.httpcache.LeveldbCacheStorage``
-* install `LevelDB python bindings`_ like ``pip install leveldb``
+      :param spider: the spider which has been opened
+      :type spider: :class:`~scrapy.spiders.Spider` object
 
-.. _LevelDB: http://code.google.com/p/leveldb/
-.. _leveldb python bindings: http://pypi.python.org/pypi/leveldb
+    .. method:: close_spider(spider)
+
+      This method gets called after a spider has been closed. It handles
+      the :signal:`close_spider <spider_closed>` signal.
+
+      :param spider: the spider which has been closed
+      :type spider: :class:`~scrapy.spiders.Spider` object
+
+    .. method:: retrieve_response(spider, request)
+
+      Return response if present in cache, or ``None`` otherwise.
+
+      :param spider: the spider which generated the request
+      :type spider: :class:`~scrapy.spiders.Spider` object
+
+      :param request: the request to find cached response for
+      :type request: :class:`~scrapy.http.Request` object
+
+    .. method:: store_response(spider, request, response)
+
+      Store the given response in the cache.
+
+      :param spider: the spider for which the response is intended
+      :type spider: :class:`~scrapy.spiders.Spider` object
+
+      :param request: the corresponding request the spider generated
+      :type request: :class:`~scrapy.http.Request` object
+
+      :param response: the response to store in the cache
+      :type response: :class:`~scrapy.http.Response` object
+
+In order to use your storage backend, set:
+
+* :setting:`HTTPCACHE_STORAGE` to the Python import path of your custom storage class.
 
 
 HTTPCache middleware settings
@@ -526,7 +620,7 @@ Don't cache responses with these URI schemes.
 HTTPCACHE_STORAGE
 ^^^^^^^^^^^^^^^^^
 
-Default: ``'scrapy.contrib.httpcache.FilesystemCacheStorage'``
+Default: ``'scrapy.extensions.httpcache.FilesystemCacheStorage'``
 
 The class which implements the cache storage backend.
 
@@ -537,7 +631,7 @@ HTTPCACHE_DBM_MODULE
 
 .. versionadded:: 0.13
 
-Default: ``'anydbm'``
+Default: ``'dbm'``
 
 The database module to use in the :ref:`DBM storage backend
 <httpcache-storage-dbm>`. This setting is specific to the DBM backend.
@@ -549,15 +643,66 @@ HTTPCACHE_POLICY
 
 .. versionadded:: 0.18
 
-Default: ``'scrapy.contrib.httpcache.DummyPolicy'``
+Default: ``'scrapy.extensions.httpcache.DummyPolicy'``
 
 The class which implements the cache policy.
 
+.. setting:: HTTPCACHE_GZIP
+
+HTTPCACHE_GZIP
+^^^^^^^^^^^^^^
+
+.. versionadded:: 1.0
+
+Default: ``False``
+
+If enabled, will compress all cached data with gzip.
+This setting is specific to the Filesystem backend.
+
+.. setting:: HTTPCACHE_ALWAYS_STORE
+
+HTTPCACHE_ALWAYS_STORE
+^^^^^^^^^^^^^^^^^^^^^^
+
+.. versionadded:: 1.1
+
+Default: ``False``
+
+If enabled, will cache pages unconditionally.
+
+A spider may wish to have all responses available in the cache, for
+future use with ``Cache-Control: max-stale``, for instance. The
+DummyPolicy caches all responses but never revalidates them, and
+sometimes a more nuanced policy is desirable.
+
+This setting still respects ``Cache-Control: no-store`` directives in responses.
+If you don't want that, filter ``no-store`` out of the Cache-Control headers in
+responses you feed to the cache middleware.
+
+.. setting:: HTTPCACHE_IGNORE_RESPONSE_CACHE_CONTROLS
+
+HTTPCACHE_IGNORE_RESPONSE_CACHE_CONTROLS
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. versionadded:: 1.1
+
+Default: ``[]``
+
+List of Cache-Control directives in responses to be ignored.
+
+Sites often set "no-store", "no-cache", "must-revalidate", etc., but get
+upset at the traffic a spider can generate if it actually respects those
+directives. This allows to selectively ignore Cache-Control directives
+that are known to be unimportant for the sites being crawled.
+
+We assume that the spider will not issue Cache-Control directives
+in requests unless it actually needs them, so directives in requests are
+not filtered.
 
 HttpCompressionMiddleware
 -------------------------
 
-.. module:: scrapy.contrib.downloadermiddleware.httpcompression
+.. module:: scrapy.downloadermiddlewares.httpcompression
    :synopsis: Http Compression Middleware
 
 .. class:: HttpCompressionMiddleware
@@ -565,6 +710,12 @@ HttpCompressionMiddleware
    This middleware allows compressed (gzip, deflate) traffic to be
    sent/received from web sites.
 
+   This middleware also supports decoding `brotli-compressed`_ responses,
+   provided `brotlipy`_ is installed.
+
+.. _brotli-compressed: https://www.ietf.org/rfc/rfc7932.txt
+.. _brotlipy: https://pypi.org/project/brotlipy/
+
 HttpCompressionMiddleware Settings
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -578,43 +729,37 @@ Default: ``True``
 Whether the Compression middleware will be enabled.
 
 
-ChunkedTransferMiddleware
--------------------------
-
-.. module:: scrapy.contrib.downloadermiddleware.chunked
-   :synopsis: Chunked Transfer Middleware
-
-.. class:: ChunkedTransferMiddleware
-
-   This middleware adds support for `chunked transfer encoding`_
-
 HttpProxyMiddleware
 -------------------
 
-.. module:: scrapy.contrib.downloadermiddleware.httpproxy
+.. module:: scrapy.downloadermiddlewares.httpproxy
    :synopsis: Http Proxy Middleware
 
 .. versionadded:: 0.8
 
+.. reqmeta:: proxy
+
 .. class:: HttpProxyMiddleware
 
    This middleware sets the HTTP proxy to use for requests, by setting the
-   ``proxy`` meta value to :class:`~scrapy.http.Request` objects.
+   ``proxy`` meta value for :class:`~scrapy.http.Request` objects.
 
-   Like the Python standard library modules `urllib`_ and `urllib2`_, it obeys
+   Like the Python standard library module :mod:`urllib.request`, it obeys
    the following environment variables:
 
    * ``http_proxy``
    * ``https_proxy``
    * ``no_proxy``
 
-.. _urllib: http://docs.python.org/library/urllib.html
-.. _urllib2: http://docs.python.org/library/urllib2.html
+   You can also set the meta key ``proxy`` per-request, to a value like
+   ``http://some_proxy_server:port`` or ``http://username:password@some_proxy_server:port``.
+   Keep in mind this value will take precedence over ``http_proxy``/``https_proxy``
+   environment variables, and it will also ignore ``no_proxy`` environment variable.
 
 RedirectMiddleware
 ------------------
 
-.. module:: scrapy.contrib.downloadermiddleware.redirect
+.. module:: scrapy.downloadermiddlewares.redirect
    :synopsis: Redirection Middleware
 
 .. class:: RedirectMiddleware
@@ -626,6 +771,17 @@ RedirectMiddleware
 The urls which the request goes through (while being redirected) can be found
 in the ``redirect_urls`` :attr:`Request.meta <scrapy.http.Request.meta>` key.
 
+.. reqmeta:: redirect_reasons
+
+The reason behind each redirect in :reqmeta:`redirect_urls` can be found in the
+``redirect_reasons`` :attr:`Request.meta <scrapy.http.Request.meta>` key. For
+example: ``[301, 302, 307, 'meta refresh']``.
+
+The format of a reason depends on the middleware that handled the corresponding
+redirect. For example, :class:`RedirectMiddleware` indicates the triggering
+response status code as an integer, while :class:`MetaRefreshMiddleware`
+always uses the ``'meta refresh'`` string as reason.
+
 The :class:`RedirectMiddleware` can be configured through the following
 settings (see the settings documentation for more info):
 
@@ -637,6 +793,21 @@ settings (see the settings documentation for more info):
 If :attr:`Request.meta <scrapy.http.Request.meta>` has ``dont_redirect``
 key set to True, the request will be ignored by this middleware.
 
+If you want to handle some redirect status codes in your spider, you can
+specify these in the ``handle_httpstatus_list`` spider attribute.
+
+For example, if you want the redirect middleware to ignore 301 and 302
+responses (and pass them through to your spider) you can do this::
+
+    class MySpider(CrawlSpider):
+        handle_httpstatus_list = [301, 302]
+
+The ``handle_httpstatus_list`` key of :attr:`Request.meta
+<scrapy.http.Request.meta>` can also be used to specify which response codes to
+allow on a per-request basis. You can also set the meta key
+``handle_httpstatus_all`` to ``True`` if you want to allow any response code
+for a request.
+
 
 RedirectMiddleware settings
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -659,7 +830,8 @@ REDIRECT_MAX_TIMES
 
 Default: ``20``
 
-The maximum number of redirections that will be follow for a single request.
+The maximum number of redirections that will be followed for a single request.
+After this maximum, the request's response is returned as is.
 
 MetaRefreshMiddleware
 ---------------------
@@ -672,10 +844,12 @@ The :class:`MetaRefreshMiddleware` can be configured through the following
 settings (see the settings documentation for more info):
 
 * :setting:`METAREFRESH_ENABLED`
+* :setting:`METAREFRESH_IGNORE_TAGS`
 * :setting:`METAREFRESH_MAXDELAY`
 
-This middleware obey :setting:`REDIRECT_MAX_TIMES` setting, :reqmeta:`dont_redirect`
-and :reqmeta:`redirect_urls` request meta keys as described for :class:`RedirectMiddleware`
+This middleware obey :setting:`REDIRECT_MAX_TIMES` setting, :reqmeta:`dont_redirect`,
+:reqmeta:`redirect_urls` and :reqmeta:`redirect_reasons` request meta keys as described
+for :class:`RedirectMiddleware`
 
 
 MetaRefreshMiddleware settings
@@ -692,30 +866,43 @@ Default: ``True``
 
 Whether the Meta Refresh middleware will be enabled.
 
-.. setting:: REDIRECT_MAX_METAREFRESH_DELAY
+.. setting:: METAREFRESH_IGNORE_TAGS
 
-REDIRECT_MAX_METAREFRESH_DELAY
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+METAREFRESH_IGNORE_TAGS
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Default: ``[]``
+
+Meta tags within these tags are ignored.
+
+.. versionchanged:: 2.0
+   The default value of :setting:`METAREFRESH_IGNORE_TAGS` changed from
+   ``['script', 'noscript']`` to ``[]``.
+
+.. setting:: METAREFRESH_MAXDELAY
+
+METAREFRESH_MAXDELAY
+^^^^^^^^^^^^^^^^^^^^
 
 Default: ``100``
 
 The maximum meta-refresh delay (in seconds) to follow the redirection.
+Some sites use meta-refresh for redirecting to a session expired page, so we
+restrict automatic redirection to the maximum delay.
 
 RetryMiddleware
 ---------------
 
-.. module:: scrapy.contrib.downloadermiddleware.retry
+.. module:: scrapy.downloadermiddlewares.retry
    :synopsis: Retry Middleware
 
 .. class:: RetryMiddleware
 
-   A middlware to retry failed requests that are potentially caused by
+   A middleware to retry failed requests that are potentially caused by
    temporary problems such as a connection timeout or HTTP 500 error.
 
 Failed pages are collected on the scraping process and rescheduled at the
 end, once the spider has finished crawling all regular (non failed) pages.
-Once there are no more failed pages to retry, this middleware sends a signal
-(retry_complete), so other extensions could connect to that signal.
 
 The :class:`RetryMiddleware` can be configured through the following
 settings (see the settings documentation for more info):
@@ -724,12 +911,6 @@ settings (see the settings documentation for more info):
 * :setting:`RETRY_TIMES`
 * :setting:`RETRY_HTTP_CODES`
 
-About HTTP errors to consider:
-
-You may want to remove 400 from :setting:`RETRY_HTTP_CODES`, if you stick to the
-HTTP protocol. It's included by default because it's a common code used
-to indicate server overload, which would be something we want to retry.
-
 .. reqmeta:: dont_retry
 
 If :attr:`Request.meta <scrapy.http.Request.meta>` has ``dont_retry`` key
@@ -758,22 +939,32 @@ Default: ``2``
 
 Maximum number of times to retry, in addition to the first download.
 
+Maximum number of retries can also be specified per-request using
+:reqmeta:`max_retry_times` attribute of :attr:`Request.meta <scrapy.http.Request.meta>`.
+When initialized, the :reqmeta:`max_retry_times` meta key takes higher
+precedence over the :setting:`RETRY_TIMES` setting.
+
 .. setting:: RETRY_HTTP_CODES
 
 RETRY_HTTP_CODES
 ^^^^^^^^^^^^^^^^
 
-Default: ``[500, 502, 503, 504, 400, 408]``
+Default: ``[500, 502, 503, 504, 522, 524, 408, 429]``
 
 Which HTTP response codes to retry. Other errors (DNS lookup issues,
 connections lost, etc) are always retried.
 
+In some cases you may want to add 400 to :setting:`RETRY_HTTP_CODES` because
+it is a common code used to indicate server overload. It is not included by
+default because HTTP specs say so.
+
+
 .. _topics-dlmw-robots:
 
 RobotsTxtMiddleware
 -------------------
 
-.. module:: scrapy.contrib.downloadermiddleware.robotstxt
+.. module:: scrapy.downloadermiddlewares.robotstxt
    :synopsis: robots.txt middleware
 
 .. class:: RobotsTxtMiddleware
@@ -784,16 +975,158 @@ RobotsTxtMiddleware
     To make sure Scrapy respects robots.txt make sure the middleware is enabled
     and the :setting:`ROBOTSTXT_OBEY` setting is enabled.
 
-    .. warning:: Keep in mind that, if you crawl using multiple concurrent
-       requests per domain, Scrapy could still  download some forbidden pages
-       if they were requested before the robots.txt file was downloaded. This
-       is a known limitation of the current robots.txt middleware and will
-       be fixed in the future.
+    The :setting:`ROBOTSTXT_USER_AGENT` setting can be used to specify the
+    user agent string to use for matching in the robots.txt_ file. If it
+    is ``None``, the User-Agent header you are sending with the request or the
+    :setting:`USER_AGENT` setting (in that order) will be used for determining
+    the user agent to use in the robots.txt_ file.
+
+    This middleware has to be combined with a robots.txt_ parser.
+
+    Scrapy ships with support for the following robots.txt_ parsers:
+
+    * :ref:`Protego <protego-parser>` (default)
+    * :ref:`RobotFileParser <python-robotfileparser>`
+    * :ref:`Reppy <reppy-parser>`
+    * :ref:`Robotexclusionrulesparser <rerp-parser>`
+
+    You can change the robots.txt_ parser with the :setting:`ROBOTSTXT_PARSER`
+    setting. Or you can also :ref:`implement support for a new parser <support-for-new-robots-parser>`.
+
+.. reqmeta:: dont_obey_robotstxt
+
+If :attr:`Request.meta <scrapy.http.Request.meta>` has
+``dont_obey_robotstxt`` key set to True
+the request will be ignored by this middleware even if
+:setting:`ROBOTSTXT_OBEY` is enabled.
+
+Parsers vary in several aspects:
+
+* Language of implementation
+
+* Supported specification
+
+* Support for wildcard matching
+
+* Usage of `length based rule <https://developers.google.com/search/reference/robots_txt#order-of-precedence-for-group-member-lines>`_:
+  in particular for ``Allow`` and ``Disallow`` directives, where the most
+  specific rule based on the length of the path trumps the less specific
+  (shorter) rule
+
+Performance comparison of different parsers is available at `the following link
+<https://anubhavp28.github.io/gsoc-weekly-checkin-12/>`_.
+
+.. _protego-parser:
+
+Protego parser
+~~~~~~~~~~~~~~
+
+Based on `Protego <https://github.com/scrapy/protego>`_:
+
+* implemented in Python
+
+* is compliant with `Google's Robots.txt Specification
+  <https://developers.google.com/search/reference/robots_txt>`_
+
+* supports wildcard matching
+
+* uses the length based rule
+
+Scrapy uses this parser by default.
+
+.. _python-robotfileparser:
+
+RobotFileParser
+~~~~~~~~~~~~~~~
+
+Based on :class:`~urllib.robotparser.RobotFileParser`:
+
+* is Python's built-in robots.txt_ parser
+
+* is compliant with `Martijn Koster's 1996 draft specification
+  <https://www.robotstxt.org/norobots-rfc.txt>`_
+
+* lacks support for wildcard matching
+
+* doesn't use the length based rule
+
+It is faster than Protego and backward-compatible with versions of Scrapy before 1.8.0.
+
+In order to use this parser, set:
+
+* :setting:`ROBOTSTXT_PARSER` to ``scrapy.robotstxt.PythonRobotParser``
+
+.. _reppy-parser:
+
+Reppy parser
+~~~~~~~~~~~~
+
+Based on `Reppy <https://github.com/seomoz/reppy/>`_:
+
+* is a Python wrapper around `Robots Exclusion Protocol Parser for C++
+  <https://github.com/seomoz/rep-cpp>`_
+
+* is compliant with `Martijn Koster's 1996 draft specification
+  <https://www.robotstxt.org/norobots-rfc.txt>`_
+
+* supports wildcard matching
+
+* uses the length based rule
+
+Native implementation, provides better speed than Protego.
+
+In order to use this parser:
+
+* Install `Reppy <https://github.com/seomoz/reppy/>`_ by running ``pip install reppy``
+
+* Set :setting:`ROBOTSTXT_PARSER` setting to
+  ``scrapy.robotstxt.ReppyRobotParser``
+
+.. _rerp-parser:
+
+Robotexclusionrulesparser
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Based on `Robotexclusionrulesparser <http://nikitathespider.com/python/rerp/>`_:
+
+* implemented in Python
+
+* is compliant with `Martijn Koster's 1996 draft specification
+  <https://www.robotstxt.org/norobots-rfc.txt>`_
+
+* supports wildcard matching
+
+* doesn't use the length based rule
+
+In order to use this parser:
+
+* Install `Robotexclusionrulesparser <http://nikitathespider.com/python/rerp/>`_ by running
+  ``pip install robotexclusionrulesparser``
+
+* Set :setting:`ROBOTSTXT_PARSER` setting to
+  ``scrapy.robotstxt.RerpRobotParser``
+
+.. _support-for-new-robots-parser:
+
+Implementing support for a new parser
+-------------------------------------
+
+You can implement support for a new robots.txt_ parser by subclassing
+the abstract base class :class:`~scrapy.robotstxt.RobotParser` and
+implementing the methods described below.
+
+.. module:: scrapy.robotstxt
+   :synopsis: robots.txt parser interface and implementations
+
+.. autoclass:: RobotParser
+   :members:
+
+.. _robots.txt: https://www.robotstxt.org/
 
 DownloaderStats
 ---------------
 
-.. module:: scrapy.contrib.downloadermiddleware.stats
+.. module:: scrapy.downloadermiddlewares.stats
    :synopsis: Downloader Stats Middleware
 
 .. class:: DownloaderStats
@@ -807,14 +1140,14 @@ DownloaderStats
 UserAgentMiddleware
 -------------------
 
-.. module:: scrapy.contrib.downloadermiddleware.useragent
+.. module:: scrapy.downloadermiddlewares.useragent
    :synopsis: User Agent Middleware
 
 .. class:: UserAgentMiddleware
 
    Middleware that allows spiders to override the default user agent.
 
-   In order for a spider to override the default user agent, its `user_agent`
+   In order for a spider to override the default user agent, its ``user_agent``
    attribute must be set.
 
 .. _ajaxcrawl-middleware:
@@ -822,13 +1155,13 @@ UserAgentMiddleware
 AjaxCrawlMiddleware
 -------------------
 
-.. module:: scrapy.contrib.downloadermiddleware.ajaxcrawl
+.. module:: scrapy.downloadermiddlewares.ajaxcrawl
 
 .. class:: AjaxCrawlMiddleware
 
    Middleware that finds 'AJAX crawlable' page variants based
    on meta-fragment html tag. See
-   https://developers.google.com/webmasters/ajax-crawling/docs/getting-started
+   https://developers.google.com/search/docs/ajax-crawling/docs/getting-started
    for more info.
 
    .. note::
@@ -853,7 +1186,25 @@ Default: ``False``
 Whether the AjaxCrawlMiddleware will be enabled. You may want to
 enable it for :ref:`broad crawls <topics-broad-crawls>`.
 
+HttpProxyMiddleware settings
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. _DBM: http://en.wikipedia.org/wiki/Dbm
-.. _anydbm: http://docs.python.org/library/anydbm.html
-.. _chunked transfer encoding: http://en.wikipedia.org/wiki/Chunked_transfer_encoding
+.. setting:: HTTPPROXY_ENABLED
+.. setting:: HTTPPROXY_AUTH_ENCODING
+
+HTTPPROXY_ENABLED
+^^^^^^^^^^^^^^^^^
+
+Default: ``True``
+
+Whether or not to enable the :class:`HttpProxyMiddleware`.
+
+HTTPPROXY_AUTH_ENCODING
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Default: ``"latin-1"``
+
+The default encoding for proxy authentication on :class:`HttpProxyMiddleware`.
+
+
+.. _DBM: https://en.wikipedia.org/wiki/Dbm
diff --git a/docs/topics/dynamic-content.rst b/docs/topics/dynamic-content.rst
new file mode 100644
index 000000000..495111b56
--- /dev/null
+++ b/docs/topics/dynamic-content.rst
@@ -0,0 +1,269 @@
+.. _topics-dynamic-content:
+
+====================================
+Selecting dynamically-loaded content
+====================================
+
+Some webpages show the desired data when you load them in a web browser.
+However, when you download them using Scrapy, you cannot reach the desired data
+using :ref:`selectors <topics-selectors>`.
+
+When this happens, the recommended approach is to
+:ref:`find the data source <topics-finding-data-source>` and extract the data
+from it.
+
+If you fail to do that, and you can nonetheless access the desired data through
+the :ref:`DOM <topics-livedom>` from your web browser, see
+:ref:`topics-javascript-rendering`.
+
+.. _topics-finding-data-source:
+
+Finding the data source
+=======================
+
+To extract the desired data, you must first find its source location.
+
+If the data is in a non-text-based format, such as an image or a PDF document,
+use the :ref:`network tool <topics-network-tool>` of your web browser to find
+the corresponding request, and :ref:`reproduce it
+<topics-reproducing-requests>`.
+
+If your web browser lets you select the desired data as text, the data may be
+defined in embedded JavaScript code, or loaded from an external resource in a
+text-based format.
+
+In that case, you can use a tool like wgrep_ to find the URL of that resource.
+
+If the data turns out to come from the original URL itself, you must
+:ref:`inspect the source code of the webpage <topics-inspecting-source>` to
+determine where the data is located.
+
+If the data comes from a different URL, you will need to :ref:`reproduce the
+corresponding request <topics-reproducing-requests>`.
+
+.. _topics-inspecting-source:
+
+Inspecting the source code of a webpage
+=======================================
+
+Sometimes you need to inspect the source code of a webpage (not the
+:ref:`DOM <topics-livedom>`) to determine where some desired data is located.
+
+Use Scrapy’s :command:`fetch` command to download the webpage contents as seen
+by Scrapy::
+
+    scrapy fetch --nolog https://example.com > response.html
+
+If the desired data is in embedded JavaScript code within a ``<script/>``
+element, see :ref:`topics-parsing-javascript`.
+
+If you cannot find the desired data, first make sure it’s not just Scrapy:
+download the webpage with an HTTP client like curl_ or wget_ and see if the
+information can be found in the response they get.
+
+If they get a response with the desired data, modify your Scrapy
+:class:`~scrapy.http.Request` to match that of the other HTTP client. For
+example, try using the same user-agent string (:setting:`USER_AGENT`) or the
+same :attr:`~scrapy.http.Request.headers`.
+
+If they also get a response without the desired data, you’ll need to take
+steps to make your request more similar to that of the web browser. See
+:ref:`topics-reproducing-requests`.
+
+.. _topics-reproducing-requests:
+
+Reproducing requests
+====================
+
+Sometimes we need to reproduce a request the way our web browser performs it.
+
+Use the :ref:`network tool <topics-network-tool>` of your web browser to see
+how your web browser performs the desired request, and try to reproduce that
+request with Scrapy.
+
+It might be enough to yield a :class:`~scrapy.http.Request` with the same HTTP
+method and URL. However, you may also need to reproduce the body, headers and
+form parameters (see :class:`~scrapy.http.FormRequest`) of that request.
+
+As all major browsers allow to export the requests in `cURL
+<https://curl.haxx.se/>`_ format, Scrapy incorporates the method
+:meth:`~scrapy.http.Request.from_curl()` to generate an equivalent
+:class:`~scrapy.http.Request` from a cURL command. To get more information
+visit :ref:`request from curl <requests-from-curl>` inside the network
+tool section.
+
+Once you get the expected response, you can :ref:`extract the desired data from
+it <topics-handling-response-formats>`.
+
+You can reproduce any request with Scrapy. However, some times reproducing all
+necessary requests may not seem efficient in developer time. If that is your
+case, and crawling speed is not a major concern for you, you can alternatively
+consider :ref:`JavaScript pre-rendering <topics-javascript-rendering>`.
+
+If you get the expected response `sometimes`, but not always, the issue is
+probably not your request, but the target server. The target server might be
+buggy, overloaded, or :ref:`banning <bans>` some of your requests.
+
+Note that to translate a cURL command into a Scrapy request,
+you may use `curl2scrapy <https://michael-shub.github.io/curl2scrapy/>`_.
+
+.. _topics-handling-response-formats:
+
+Handling different response formats
+===================================
+
+Once you have a response with the desired data, how you extract the desired
+data from it depends on the type of response:
+
+-   If the response is HTML or XML, use :ref:`selectors
+    <topics-selectors>` as usual.
+
+-   If the response is JSON, use :func:`json.loads` to load the desired data from
+    :attr:`response.text <scrapy.http.TextResponse.text>`::
+
+        data = json.loads(response.text)
+
+    If the desired data is inside HTML or XML code embedded within JSON data,
+    you can load that HTML or XML code into a
+    :class:`~scrapy.selector.Selector` and then
+    :ref:`use it <topics-selectors>` as usual::
+
+        selector = Selector(data['html'])
+
+-   If the response is JavaScript, or HTML with a ``<script/>`` element
+    containing the desired data, see :ref:`topics-parsing-javascript`.
+
+-   If the response is CSS, use a :doc:`regular expression <library/re>` to
+    extract the desired data from
+    :attr:`response.text <scrapy.http.TextResponse.text>`.
+
+.. _topics-parsing-images:
+
+-   If the response is an image or another format based on images (e.g. PDF),
+    read the response as bytes from
+    :attr:`response.body <scrapy.http.TextResponse.body>` and use an OCR
+    solution to extract the desired data as text.
+
+    For example, you can use pytesseract_. To read a table from a PDF,
+    `tabula-py`_ may be a better choice.
+
+-   If the response is SVG, or HTML with embedded SVG containing the desired
+    data, you may be able to extract the desired data using
+    :ref:`selectors <topics-selectors>`, since SVG is based on XML.
+
+    Otherwise, you might need to convert the SVG code into a raster image, and
+    :ref:`handle that raster image <topics-parsing-images>`.
+
+.. _topics-parsing-javascript:
+
+Parsing JavaScript code
+=======================
+
+If the desired data is hardcoded in JavaScript, you first need to get the
+JavaScript code:
+
+-   If the JavaScript code is in a JavaScript file, simply read
+    :attr:`response.text <scrapy.http.TextResponse.text>`.
+
+-   If the JavaScript code is within a ``<script/>`` element of an HTML page,
+    use :ref:`selectors <topics-selectors>` to extract the text within that
+    ``<script/>`` element.
+
+Once you have a string with the JavaScript code, you can extract the desired
+data from it:
+
+-   You might be able to use a :doc:`regular expression <library/re>` to
+    extract the desired data in JSON format, which you can then parse with
+    :func:`json.loads`.
+
+    For example, if the JavaScript code contains a separate line like
+    ``var data = {"field": "value"};`` you can extract that data as follows:
+
+    >>> pattern = r'\bvar\s+data\s*=\s*(\{.*?\})\s*;\s*\n'
+    >>> json_data = response.css('script::text').re_first(pattern)
+    >>> json.loads(json_data)
+    {'field': 'value'}
+
+-   chompjs_ provides an API to parse JavaScript objects into a :class:`dict`.
+
+    For example, if the JavaScript code contains
+    ``var data = {field: "value", secondField: "second value"};``
+    you can extract that data as follows:
+
+    >>> import chompjs
+    >>> javascript = response.css('script::text').get()
+    >>> data = chompjs.parse_js_object(javascript)
+    >>> data
+    {'field': 'value', 'secondField': 'second value'}
+
+-   Otherwise, use js2xml_ to convert the JavaScript code into an XML document
+    that you can parse using :ref:`selectors <topics-selectors>`.
+
+    For example, if the JavaScript code contains
+    ``var data = {field: "value"};`` you can extract that data as follows:
+
+    >>> import js2xml
+    >>> import lxml.etree
+    >>> from parsel import Selector
+    >>> javascript = response.css('script::text').get()
+    >>> xml = lxml.etree.tostring(js2xml.parse(javascript), encoding='unicode')
+    >>> selector = Selector(text=xml)
+    >>> selector.css('var[name="data"]').get()
+    '<var name="data"><object><property name="field"><string>value</string></property></object></var>'
+
+.. _topics-javascript-rendering:
+
+Pre-rendering JavaScript
+========================
+
+On webpages that fetch data from additional requests, reproducing those
+requests that contain the desired data is the preferred approach. The effort is
+often worth the result: structured, complete data with minimum parsing time and
+network transfer.
+
+However, sometimes it can be really hard to reproduce certain requests. Or you
+may need something that no request can give you, such as a screenshot of a
+webpage as seen in a web browser.
+
+In these cases use the Splash_ JavaScript-rendering service, along with
+`scrapy-splash`_ for seamless integration.
+
+Splash returns as HTML the :ref:`DOM <topics-livedom>` of a webpage, so that
+you can parse it with :ref:`selectors <topics-selectors>`. It provides great
+flexibility through configuration_ or scripting_.
+
+If you need something beyond what Splash offers, such as interacting with the
+DOM on-the-fly from Python code instead of using a previously-written script,
+or handling multiple web browser windows, you might need to
+:ref:`use a headless browser <topics-headless-browsing>` instead.
+
+.. _configuration: https://splash.readthedocs.io/en/stable/api.html
+.. _scripting: https://splash.readthedocs.io/en/stable/scripting-tutorial.html
+
+.. _topics-headless-browsing:
+
+Using a headless browser
+========================
+
+A `headless browser`_ is a special web browser that provides an API for
+automation.
+
+The easiest way to use a headless browser with Scrapy is to use Selenium_,
+along with `scrapy-selenium`_ for seamless integration.
+
+
+.. _AJAX: https://en.wikipedia.org/wiki/Ajax_%28programming%29
+.. _chompjs: https://github.com/Nykakin/chompjs
+.. _CSS: https://en.wikipedia.org/wiki/Cascading_Style_Sheets
+.. _curl: https://curl.haxx.se/
+.. _headless browser: https://en.wikipedia.org/wiki/Headless_browser
+.. _JavaScript: https://en.wikipedia.org/wiki/JavaScript
+.. _js2xml: https://github.com/scrapinghub/js2xml
+.. _pytesseract: https://github.com/madmaze/pytesseract
+.. _scrapy-selenium: https://github.com/clemfromspace/scrapy-selenium
+.. _scrapy-splash: https://github.com/scrapy-plugins/scrapy-splash
+.. _Selenium: https://www.selenium.dev/
+.. _Splash: https://github.com/scrapinghub/splash
+.. _tabula-py: https://github.com/chezou/tabula-py
+.. _wget: https://www.gnu.org/software/wget/
+.. _wgrep: https://github.com/stav/wgrep
\ No newline at end of file
diff --git a/docs/topics/email.rst b/docs/topics/email.rst
index e73c74753..1a2bc6330 100644
--- a/docs/topics/email.rst
+++ b/docs/topics/email.rst
@@ -7,21 +7,19 @@ Sending e-mail
 .. module:: scrapy.mail
    :synopsis: Email sending facility
 
-Although Python makes sending e-mails relatively easy via the `smtplib`_
+Although Python makes sending e-mails relatively easy via the :mod:`smtplib`
 library, Scrapy provides its own facility for sending e-mails which is very
-easy to use and it's implemented using `Twisted non-blocking IO`_, to avoid
-interfering with the non-blocking IO of the crawler. It also provides a
-simple API for sending attachments and it's very easy to configure, with a few
-:ref:`settings <topics-email-settings>`.
-
-.. _smtplib: http://docs.python.org/library/smtplib.html
-.. _Twisted non-blocking IO: http://twistedmatrix.com/documents/current/core/howto/defer-intro.html
+easy to use and it's implemented using :doc:`Twisted non-blocking IO
+<twisted:core/howto/defer-intro>`, to avoid interfering with the non-blocking
+IO of the crawler. It also provides a simple API for sending attachments and
+it's very easy to configure, with a few :ref:`settings
+<topics-email-settings>`.
 
 Quick example
 =============
 
 There are two ways to instantiate the mail sender. You can instantiate it using
-the standard constructor::
+the standard ``__init__`` method::
 
     from scrapy.mail import MailSender
     mailer = MailSender()
@@ -39,7 +37,8 @@ MailSender class reference
 ==========================
 
 MailSender is the preferred class to use for sending emails from Scrapy, as it
-uses `Twisted non-blocking IO`_, like the rest of the framework.
+uses :doc:`Twisted non-blocking IO <twisted:core/howto/defer-intro>`, like the
+rest of the framework.
 
 .. class:: MailSender(smtphost=None, mailfrom=None, smtpuser=None, smtppass=None, smtpport=None)
 
@@ -54,19 +53,19 @@ uses `Twisted non-blocking IO`_, like the rest of the framework.
     :param smtpuser: the SMTP user. If omitted, the :setting:`MAIL_USER`
       setting will be used. If not given, no SMTP authentication will be
       performed.
-    :type smtphost: str
+    :type smtphost: str or bytes
 
     :param smtppass: the SMTP pass for authentication.
-    :type smtppass: str
+    :type smtppass: str or bytes
 
     :param smtpport: the SMTP port to connect to
     :type smtpport: int
 
     :param smtptls: enforce using SMTP STARTTLS
-    :type smtpport: boolean
+    :type smtptls: bool
 
     :param smtpssl: enforce using a secure SSL connection
-    :type smtpport: boolean
+    :type smtpssl: bool
 
     .. classmethod:: from_settings(settings)
 
@@ -76,18 +75,18 @@ uses `Twisted non-blocking IO`_, like the rest of the framework.
         :param settings: the e-mail recipients
         :type settings: :class:`scrapy.settings.Settings` object
 
-    .. method:: send(to, subject, body, cc=None, attachs=(), mimetype='text/plain')
+    .. method:: send(to, subject, body, cc=None, attachs=(), mimetype='text/plain', charset=None)
 
         Send email to the given recipients.
 
-        :param to: the e-mail recipients
-        :type to: list
+        :param to: the e-mail recipients as a string or as a list of strings
+        :type to: str or list
 
         :param subject: the subject of the e-mail
         :type subject: str
 
-        :param cc: the e-mails to CC
-        :type cc: list
+        :param cc: the e-mails to CC as a string or as a list of strings
+        :type cc: str or list
 
         :param body: the e-mail body
         :type body: str
@@ -97,18 +96,21 @@ uses `Twisted non-blocking IO`_, like the rest of the framework.
           appear on the e-mail's attachment, ``mimetype`` is the mimetype of the
           attachment and ``file_object`` is a readable file object with the
           contents of the attachment
-        :type attachs: iterable
+        :type attachs: collections.abc.Iterable
 
         :param mimetype: the MIME type of the e-mail
         :type mimetype: str
 
+        :param charset: the character encoding to use for the e-mail contents
+        :type charset: str
+
 
 .. _topics-email-settings:
 
 Mail settings
 =============
 
-These settings define the default constructor values of the :class:`MailSender`
+These settings define the default ``__init__`` method values of the :class:`MailSender`
 class, and can be used to configure e-mail notifications in your project without
 writing any code (for those extensions and code that uses :class:`MailSender`).
 
@@ -161,7 +163,7 @@ Password to use for SMTP authentication, along with :setting:`MAIL_USER`.
 .. setting:: MAIL_TLS
 
 MAIL_TLS
----------
+--------
 
 Default: ``False``
 
@@ -170,7 +172,7 @@ Enforce using STARTTLS. STARTTLS is a way to take an existing insecure connectio
 .. setting:: MAIL_SSL
 
 MAIL_SSL
----------
+--------
 
 Default: ``False``
 
diff --git a/docs/topics/exceptions.rst b/docs/topics/exceptions.rst
index 8a10ee796..583a50ab8 100644
--- a/docs/topics/exceptions.rst
+++ b/docs/topics/exceptions.rst
@@ -14,13 +14,6 @@ Built-in Exceptions reference
 
 Here's a list of all exceptions included in Scrapy and their usage.
 
-DropItem
---------
-
-.. exception:: DropItem
-
-The exception that must be raised by item pipeline stages to stop processing an
-Item. For more information see :ref:`topics-item-pipeline`.
 
 CloseSpider
 -----------
@@ -39,6 +32,22 @@ For example::
         if 'Bandwidth exceeded' in response.body:
             raise CloseSpider('bandwidth_exceeded')
 
+DontCloseSpider
+---------------
+
+.. exception:: DontCloseSpider
+
+This exception can be raised in a :signal:`spider_idle` signal handler to
+prevent the spider from being closed.
+
+DropItem
+--------
+
+.. exception:: DropItem
+
+The exception that must be raised by item pipeline stages to stop processing an
+Item. For more information see :ref:`topics-item-pipeline`.
+
 IgnoreRequest
 -------------
 
@@ -57,10 +66,10 @@ remain disabled. Those components include:
 
  * Extensions
  * Item pipelines
- * Downloader middlwares
+ * Downloader middlewares
  * Spider middlewares
 
-The exception must be raised in the component constructor.
+The exception must be raised in the component's ``__init__`` method.
 
 NotSupported
 ------------
@@ -69,3 +78,37 @@ NotSupported
 
 This exception is raised to indicate an unsupported feature.
 
+StopDownload
+-------------
+
+.. versionadded:: 2.2
+
+.. exception:: StopDownload(fail=True)
+
+Raised from a :class:`~scrapy.signals.bytes_received` signal handler to
+indicate that no further bytes should be downloaded for a response.
+
+The ``fail`` boolean parameter controls which method will handle the resulting
+response:
+
+* If ``fail=True`` (default), the request errback is called. The response object is
+  available as the ``response`` attribute of the ``StopDownload`` exception,
+  which is in turn stored as the ``value`` attribute of the received
+  :class:`~twisted.python.failure.Failure` object. This means that in an errback
+  defined as ``def errback(self, failure)``, the response can be accessed though
+  ``failure.value.response``.
+
+* If ``fail=False``, the request callback is called instead.
+
+In both cases, the response could have its body truncated: the body contains
+all bytes received up until the exception is raised, including the bytes
+received in the signal handler that raises the exception. Also, the response
+object is marked with ``"download_stopped"`` in its :attr:`Response.flags`
+attribute.
+
+.. note:: ``fail`` is a keyword-only parameter, i.e. raising
+    ``StopDownload(False)`` or ``StopDownload(True)`` will raise
+    a :class:`TypeError`.
+
+See the documentation for the :class:`~scrapy.signals.bytes_received` signal
+and the :ref:`topics-stop-response-download` topic for additional information and examples.
diff --git a/docs/topics/exporters.rst b/docs/topics/exporters.rst
index c472f5b96..11ef5b2a6 100644
--- a/docs/topics/exporters.rst
+++ b/docs/topics/exporters.rst
@@ -4,10 +4,10 @@
 Item Exporters
 ==============
 
-.. module:: scrapy.contrib.exporter
+.. module:: scrapy.exporters
    :synopsis: Item Exporters
 
-Once you have scraped your Items, you often want to persist or export those
+Once you have scraped your items, you often want to persist or export those
 items, to use the data in some other application. That is, after all, the whole
 purpose of the scraping process.
 
@@ -36,38 +36,37 @@ to export
 3. and finally call the :meth:`~BaseItemExporter.finish_exporting` to signal
 the end of the exporting process
 
-Here you can see an :doc:`Item Pipeline <item-pipeline>` which uses an Item
-Exporter to export scraped items to different files, one per spider::
+Here you can see an :doc:`Item Pipeline <item-pipeline>` which uses multiple
+Item Exporters to group scraped items to different files according to the
+value of one of their fields::
 
-   from scrapy import signals
-   from scrapy.contrib.exporter import XmlItemExporter
+    from itemadapter import ItemAdapter
+    from scrapy.exporters import XmlItemExporter
 
-   class XmlExportPipeline(object):
+    class PerYearXmlExportPipeline:
+        """Distribute items across multiple XML files according to their 'year' field"""
 
-       def __init__(self):
-           self.files = {}
+        def open_spider(self, spider):
+            self.year_to_exporter = {}
 
-        @classmethod
-        def from_crawler(cls, crawler):
-            pipeline = cls()
-            crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
-            crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
-            return pipeline
+        def close_spider(self, spider):
+            for exporter in self.year_to_exporter.values():
+                exporter.finish_exporting()
 
-       def spider_opened(self, spider):
-           file = open('%s_products.xml' % spider.name, 'w+b')
-           self.files[spider] = file
-           self.exporter = XmlItemExporter(file)
-           self.exporter.start_exporting()
+        def _exporter_for_item(self, item):
+            adapter = ItemAdapter(item)
+            year = adapter['year']
+            if year not in self.year_to_exporter:
+                f = open('{}.xml'.format(year), 'wb')
+                exporter = XmlItemExporter(f)
+                exporter.start_exporting()
+                self.year_to_exporter[year] = exporter
+            return self.year_to_exporter[year]
 
-       def spider_closed(self, spider):
-           self.exporter.finish_exporting()
-           file = self.files.pop(spider)
-           file.close()
-
-       def process_item(self, item, spider):
-           self.exporter.export_item(item)
-           return item
+        def process_item(self, item, spider):
+            exporter = self._exporter_for_item(item)
+            exporter.export_item(item)
+            return item
 
 
 .. _topics-exporters-field-serialization:
@@ -90,9 +89,9 @@ described next.
 1. Declaring a serializer in the field
 --------------------------------------
 
-You can declare a serializer in the :ref:`field metadata
-<topics-items-fields>`. The serializer must be a callable which receives a
-value and returns its serialized form.
+If you use :class:`~.Item` you can declare a serializer in the
+:ref:`field metadata <topics-items-fields>`. The serializer must be
+a callable which receives a value and returns its serialized form.
 
 Example::
 
@@ -117,7 +116,7 @@ after your custom code.
 
 Example::
 
-      from scrapy.contrib.exporter import XmlItemExporter
+      from scrapy.exporter import XmlItemExporter
 
       class ProductXmlExporter(XmlItemExporter):
 
@@ -140,16 +139,19 @@ output examples, which assume you're exporting these two items::
 BaseItemExporter
 ----------------
 
-.. class:: BaseItemExporter(fields_to_export=None, export_empty_fields=False, encoding='utf-8')
+.. class:: BaseItemExporter(fields_to_export=None, export_empty_fields=False, encoding='utf-8', indent=0, dont_fail=False)
 
    This is the (abstract) base class for all Item Exporters. It provides
    support for common features used by all (concrete) Item Exporters, such as
    defining what fields to export, whether to export empty fields, or which
    encoding to use.
 
-   These features can be configured through the constructor arguments which
+   These features can be configured through the ``__init__`` method arguments which
    populate their respective instance attributes: :attr:`fields_to_export`,
-   :attr:`export_empty_fields`, :attr:`encoding`.
+   :attr:`export_empty_fields`, :attr:`encoding`, :attr:`indent`.
+
+   .. versionadded:: 2.0
+      The *dont_fail* parameter.
 
    .. method:: export_item(item)
 
@@ -164,11 +166,12 @@ BaseItemExporter
       By default, this method looks for a serializer :ref:`declared in the item
       field <topics-exporters-serializers>` and returns the result of applying
       that serializer to the value. If no serializer is found, it returns the
-      value unchanged except for ``unicode`` values which are encoded to
-      ``str`` using the encoding declared in the :attr:`encoding` attribute.
+      value unchanged.
 
-      :param field: the field being serialized
-      :type field: :class:`~scrapy.item.Field` object
+      :param field: the field being serialized. If the source :ref:`item object
+          <item-types>` does not define field metadata, *field* is an empty
+          :class:`dict`.
+      :type field: :class:`~scrapy.item.Field` object or a :class:`dict` instance
 
       :param name: the name of the field being serialized
       :type name: str
@@ -191,35 +194,56 @@ BaseItemExporter
 
    .. attribute:: fields_to_export
 
-      A list with the name of the fields that will be exported, or None if you
-      want to export all fields. Defaults to None.
+      A list with the name of the fields that will be exported, or ``None`` if
+      you want to export all fields. Defaults to ``None``.
 
       Some exporters (like :class:`CsvItemExporter`) respect the order of the
       fields defined in this attribute.
 
+      When using :ref:`item objects <item-types>` that do not expose all their
+      possible fields, exporters that do not support exporting a different
+      subset of fields per item will only export the fields found in the first
+      item exported. Use ``fields_to_export`` to define all the fields to be
+      exported.
+
    .. attribute:: export_empty_fields
 
       Whether to include empty/unpopulated item fields in the exported data.
       Defaults to ``False``. Some exporters (like :class:`CsvItemExporter`)
       ignore this attribute and always export all empty fields.
 
+      This option is ignored for dict items.
+
    .. attribute:: encoding
 
-      The encoding that will be used to encode unicode values. This only
-      affects unicode values (which are always serialized to str using this
-      encoding). Other value types are passed unchanged to the specific
-      serialization library.
+      The output character encoding.
+
+   .. attribute:: indent
+
+      Amount of spaces used to indent the output on each level. Defaults to ``0``.
+
+      * ``indent=None`` selects the most compact representation,
+        all items in the same line with no indentation
+      * ``indent<=0`` each item on its own line, no indentation
+      * ``indent>0`` each item on its own line, indented with the provided numeric value
+
+PythonItemExporter
+------------------
+
+.. autoclass:: PythonItemExporter
+
 
 .. highlight:: none
 
 XmlItemExporter
 ---------------
 
-.. class:: XmlItemExporter(file, item_element='item', root_element='items', \**kwargs)
+.. class:: XmlItemExporter(file, item_element='item', root_element='items', **kwargs)
 
-   Exports Items in XML format to the specified file object.
+   Exports items in XML format to the specified file object.
 
-   :param file: the file-like object to use for exporting the data.
+   :param file: the file-like object to use for exporting the data. Its ``write`` method should
+                accept ``bytes`` (a disk file opened in binary mode, a ``io.BytesIO`` object, etc)
 
    :param root_element: The name of root element in the exported XML.
    :type root_element: str
@@ -227,8 +251,8 @@ XmlItemExporter
    :param item_element: The name of each item element in the exported XML.
    :type item_element: str
 
-   The additional keyword arguments of this constructor are passed to the
-   :class:`BaseItemExporter` constructor.
+   The additional keyword arguments of this ``__init__`` method are passed to the
+   :class:`BaseItemExporter` ``__init__`` method.
 
    A typical output of this exporter would be::
 
@@ -268,27 +292,28 @@ XmlItemExporter
 CsvItemExporter
 ---------------
 
-.. class:: CsvItemExporter(file, include_headers_line=True, join_multivalued=',', \**kwargs)
+.. class:: CsvItemExporter(file, include_headers_line=True, join_multivalued=',', **kwargs)
 
-   Exports Items in CSV format to the given file-like object. If the
+   Exports items in CSV format to the given file-like object. If the
    :attr:`fields_to_export` attribute is set, it will be used to define the
    CSV columns and their order. The :attr:`export_empty_fields` attribute has
    no effect on this exporter.
 
-   :param file: the file-like object to use for exporting the data.
+   :param file: the file-like object to use for exporting the data. Its ``write`` method should
+                accept ``bytes`` (a disk file opened in binary mode, a ``io.BytesIO`` object, etc)
 
    :param include_headers_line: If enabled, makes the exporter output a header
       line with the field names taken from
       :attr:`BaseItemExporter.fields_to_export` or the first exported item fields.
-   :type include_headers_line: boolean
+   :type include_headers_line: bool
 
    :param join_multivalued: The char (or chars) that will be used for joining
       multi-valued fields, if found.
    :type include_headers_line: str
 
-   The additional keyword arguments of this constructor are passed to the
-   :class:`BaseItemExporter` constructor, and the leftover arguments to the
-   `csv.writer`_ constructor, so you can use any `csv.writer` constructor
+   The additional keyword arguments of this ``__init__`` method are passed to the
+   :class:`BaseItemExporter` ``__init__`` method, and the leftover arguments to the
+   :func:`csv.writer` function, so you can use any :func:`csv.writer` function
    argument to customize this exporter.
 
    A typical output of this exporter would be::
@@ -297,40 +322,38 @@ CsvItemExporter
       Color TV,1200
       DVD player,200
 
-.. _csv.writer: http://docs.python.org/library/csv.html#csv.writer
-
 PickleItemExporter
 ------------------
 
-.. class:: PickleItemExporter(file, protocol=0, \**kwargs)
+.. class:: PickleItemExporter(file, protocol=0, **kwargs)
 
-   Exports Items in pickle format to the given file-like object.
+   Exports items in pickle format to the given file-like object.
 
-   :param file: the file-like object to use for exporting the data.
+   :param file: the file-like object to use for exporting the data. Its ``write`` method should
+                accept ``bytes`` (a disk file opened in binary mode, a ``io.BytesIO`` object, etc)
 
    :param protocol: The pickle protocol to use.
    :type protocol: int
 
-   For more information, refer to the `pickle module documentation`_.
+   For more information, see :mod:`pickle`.
 
-   The additional keyword arguments of this constructor are passed to the
-   :class:`BaseItemExporter` constructor.
+   The additional keyword arguments of this ``__init__`` method are passed to the
+   :class:`BaseItemExporter` ``__init__`` method.
 
    Pickle isn't a human readable format, so no output examples are provided.
 
-.. _pickle module documentation: http://docs.python.org/library/pickle.html
-
 PprintItemExporter
 ------------------
 
-.. class:: PprintItemExporter(file, \**kwargs)
+.. class:: PprintItemExporter(file, **kwargs)
 
-   Exports Items in pretty print format to the specified file object.
+   Exports items in pretty print format to the specified file object.
 
-   :param file: the file-like object to use for exporting the data.
+   :param file: the file-like object to use for exporting the data. Its ``write`` method should
+                accept ``bytes`` (a disk file opened in binary mode, a ``io.BytesIO`` object, etc)
 
-   The additional keyword arguments of this constructor are passed to the
-   :class:`BaseItemExporter` constructor.
+   The additional keyword arguments of this ``__init__`` method are passed to the
+   :class:`BaseItemExporter` ``__init__`` method.
 
    A typical output of this exporter would be::
 
@@ -342,15 +365,16 @@ PprintItemExporter
 JsonItemExporter
 ----------------
 
-.. class:: JsonItemExporter(file, \**kwargs)
+.. class:: JsonItemExporter(file, **kwargs)
 
-   Exports Items in JSON format to the specified file-like object, writing all
-   objects as a list of objects. The additional constructor arguments are
-   passed to the :class:`BaseItemExporter` constructor, and the leftover
-   arguments to the `JSONEncoder`_ constructor, so you can use any
-   `JSONEncoder`_ constructor argument to customize this exporter.
+   Exports items in JSON format to the specified file-like object, writing all
+   objects as a list of objects. The additional ``__init__`` method arguments are
+   passed to the :class:`BaseItemExporter` ``__init__`` method, and the leftover
+   arguments to the :class:`~json.JSONEncoder` ``__init__`` method, so you can use any
+   :class:`~json.JSONEncoder` ``__init__`` method argument to customize this exporter.
 
-   :param file: the file-like object to use for exporting the data.
+   :param file: the file-like object to use for exporting the data. Its ``write`` method should
+                accept ``bytes`` (a disk file opened in binary mode, a ``io.BytesIO`` object, etc)
 
    A typical output of this exporter would be::
 
@@ -367,20 +391,19 @@ JsonItemExporter
       stream-friendly format, consider using :class:`JsonLinesItemExporter`
       instead, or splitting the output in multiple chunks.
 
-.. _JSONEncoder: http://docs.python.org/library/json.html#json.JSONEncoder
-
 JsonLinesItemExporter
 ---------------------
 
-.. class:: JsonLinesItemExporter(file, \**kwargs)
+.. class:: JsonLinesItemExporter(file, **kwargs)
 
-   Exports Items in JSON format to the specified file-like object, writing one
-   JSON-encoded item per line. The additional constructor arguments are passed
-   to the :class:`BaseItemExporter` constructor, and the leftover arguments to
-   the `JSONEncoder`_ constructor, so you can use any `JSONEncoder`_
-   constructor argument to customize this exporter.
+   Exports items in JSON format to the specified file-like object, writing one
+   JSON-encoded item per line. The additional ``__init__`` method arguments are passed
+   to the :class:`BaseItemExporter` ``__init__`` method, and the leftover arguments to
+   the :class:`~json.JSONEncoder` ``__init__`` method, so you can use any
+   :class:`~json.JSONEncoder` ``__init__`` method argument to customize this exporter.
 
-   :param file: the file-like object to use for exporting the data.
+   :param file: the file-like object to use for exporting the data. Its ``write`` method should
+                accept ``bytes`` (a disk file opened in binary mode, a ``io.BytesIO`` object, etc)
 
    A typical output of this exporter would be::
 
@@ -390,4 +413,7 @@ JsonLinesItemExporter
    Unlike the one produced by :class:`JsonItemExporter`, the format produced by
    this exporter is well suited for serializing large amounts of data.
 
-.. _JSONEncoder: http://docs.python.org/library/json.html#json.JSONEncoder
+MarshalItemExporter
+-------------------
+
+.. autoclass:: MarshalItemExporter
diff --git a/docs/topics/extensions.rst b/docs/topics/extensions.rst
index 593a08ddc..0fc83e645 100644
--- a/docs/topics/extensions.rst
+++ b/docs/topics/extensions.rst
@@ -5,7 +5,7 @@ Extensions
 ==========
 
 The extensions framework provides a mechanism for inserting your own
-custom functionality into Scrapy. 
+custom functionality into Scrapy.
 
 Extensions are just regular classes that are instantiated at Scrapy startup,
 when extensions are initialized.
@@ -17,42 +17,41 @@ Extensions use the :ref:`Scrapy settings <topics-settings>` to manage their
 settings, just like any other Scrapy code.
 
 It is customary for extensions to prefix their settings with their own name, to
-avoid collision with existing (and future) extensions. For example, an
+avoid collision with existing (and future) extensions. For example, a
 hypothetic extension to handle `Google Sitemaps`_ would use settings like
-`GOOGLESITEMAP_ENABLED`, `GOOGLESITEMAP_DEPTH`, and so on.
+``GOOGLESITEMAP_ENABLED``, ``GOOGLESITEMAP_DEPTH``, and so on.
 
-.. _Google Sitemaps: http://en.wikipedia.org/wiki/Sitemaps
+.. _Google Sitemaps: https://en.wikipedia.org/wiki/Sitemaps
 
 Loading & activating extensions
 ===============================
 
 Extensions are loaded and activated at startup by instantiating a single
 instance of the extension class. Therefore, all the extension initialization
-code must be performed in the class constructor (``__init__`` method).
+code must be performed in the class ``__init__`` method.
 
 To make an extension available, add it to the :setting:`EXTENSIONS` setting in
 your Scrapy settings. In :setting:`EXTENSIONS`, each extension is represented
 by a string: the full Python path to the extension's class name. For example::
 
     EXTENSIONS = {
-        'scrapy.contrib.corestats.CoreStats': 500,
-        'scrapy.telnet.TelnetConsole': 500,
+        'scrapy.extensions.corestats.CoreStats': 500,
+        'scrapy.extensions.telnet.TelnetConsole': 500,
     }
 
 
 As you can see, the :setting:`EXTENSIONS` setting is a dict where the keys are
 the extension paths, and their values are the orders, which define the
-extension *loading* order. Extensions orders are not as important as middleware
-orders though, and they are typically irrelevant, ie. it doesn't matter in
-which order the extensions are loaded because they don't depend on each other
-[1].
+extension *loading* order. The :setting:`EXTENSIONS` setting is merged with the
+:setting:`EXTENSIONS_BASE` setting defined in Scrapy (and not meant to be
+overridden) and then sorted by order to get the final sorted list of enabled
+extensions.
 
-However, this feature can be exploited if you need to add an extension which
-depends on other extensions already loaded.
-
-[1] This is is why the :setting:`EXTENSIONS_BASE` setting in Scrapy (which
-contains all built-in extensions enabled by default) defines all the extensions
-with the same order (``500``).
+As extensions typically do not depend on each other, their loading order is
+irrelevant in most cases. This is why the :setting:`EXTENSIONS_BASE` setting
+defines all extensions with the same order (``0``). However, this feature can
+be exploited if you need to add an extension which depends on other extensions
+already loaded.
 
 Available, enabled and disabled extensions
 ==========================================
@@ -64,25 +63,21 @@ but disabled unless the :setting:`HTTPCACHE_ENABLED` setting is set.
 Disabling an extension
 ======================
 
-In order to disable an extension that comes enabled by default (ie. those
+In order to disable an extension that comes enabled by default (i.e. those
 included in the :setting:`EXTENSIONS_BASE` setting) you must set its order to
 ``None``. For example::
 
     EXTENSIONS = {
-        'scrapy.contrib.corestats.CoreStats': None,
+        'scrapy.extensions.corestats.CoreStats': None,
     }
 
 Writing your own extension
 ==========================
 
-Writing your own extension is easy. Each extension is a single Python class
-which doesn't need to implement any particular method. 
-
-The main entry point for a Scrapy extension (this also includes middlewares and
-pipelines) is the ``from_crawler`` class method which receives a
-``Crawler`` instance which is the main object controlling the Scrapy crawler.
-Through that object you can access settings, signals, stats, and also control
-the crawler behaviour, if your extension needs to such thing.
+Each extension is a Python class. The main entry point for a Scrapy extension
+(this also includes middlewares and pipelines) is the ``from_crawler``
+class method which receives a ``Crawler`` instance. Through the Crawler object
+you can access settings, signals, stats, and also control the crawling behaviour.
 
 Typically, extensions connect to :ref:`signals <topics-signals>` and perform
 tasks triggered by them.
@@ -106,10 +101,13 @@ number of items will be specified through the ``MYEXT_ITEMCOUNT`` setting.
 
 Here is the code of such extension::
 
+    import logging
     from scrapy import signals
     from scrapy.exceptions import NotConfigured
 
-    class SpiderOpenCloseLogging(object):
+    logger = logging.getLogger(__name__)
+
+    class SpiderOpenCloseLogging:
 
         def __init__(self, item_count):
             self.item_count = item_count
@@ -133,20 +131,20 @@ Here is the code of such extension::
             crawler.signals.connect(ext.spider_closed, signal=signals.spider_closed)
             crawler.signals.connect(ext.item_scraped, signal=signals.item_scraped)
 
-            # return the extension object 
+            # return the extension object
             return ext
 
         def spider_opened(self, spider):
-            spider.log("opened spider %s" % spider.name)
+            logger.info("opened spider %s", spider.name)
 
         def spider_closed(self, spider):
-            spider.log("closed spider %s" % spider.name)
+            logger.info("closed spider %s", spider.name)
 
         def item_scraped(self, item, spider):
             self.items_scraped += 1
-            if self.items_scraped == self.item_count:
-                spider.log("scraped %d items, resetting counter" % self.items_scraped)
-                self.item_count = 0
+            if self.items_scraped % self.item_count == 0:
+                logger.info("scraped %d items", self.items_scraped)
+
 
 .. _topics-extensions-ref:
 
@@ -159,7 +157,7 @@ General purpose extensions
 Log Stats extension
 ~~~~~~~~~~~~~~~~~~~
 
-.. module:: scrapy.contrib.logstats
+.. module:: scrapy.extensions.logstats
    :synopsis: Basic stats logging
 
 .. class:: LogStats
@@ -169,7 +167,7 @@ Log basic stats like crawled pages and scraped items.
 Core Stats extension
 ~~~~~~~~~~~~~~~~~~~~
 
-.. module:: scrapy.contrib.corestats
+.. module:: scrapy.extensions.corestats
    :synopsis: Core stats collection
 
 .. class:: CoreStats
@@ -182,13 +180,13 @@ enabled (see :ref:`topics-stats`).
 Telnet console extension
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. module:: scrapy.telnet
-   :synopsis: Telnet console 
+.. module:: scrapy.extensions.telnet
+   :synopsis: Telnet console
 
-.. class:: scrapy.telnet.TelnetConsole
+.. class:: TelnetConsole
 
 Provides a telnet console for getting into a Python interpreter inside the
-currently running Scrapy process, which can be very useful for debugging. 
+currently running Scrapy process, which can be very useful for debugging.
 
 The telnet console must be enabled by the :setting:`TELNETCONSOLE_ENABLED`
 setting, and the server will listen in the port specified in
@@ -199,16 +197,16 @@ setting, and the server will listen in the port specified in
 Memory usage extension
 ~~~~~~~~~~~~~~~~~~~~~~
 
-.. module:: scrapy.contrib.memusage
+.. module:: scrapy.extensions.memusage
    :synopsis: Memory usage extension
 
-.. class:: scrapy.contrib.memusage.MemoryUsage
+.. class:: MemoryUsage
 
 .. note:: This extension does not work in Windows.
 
 Monitors the memory used by the Scrapy process that runs the spider and:
 
-1, sends a notification e-mail when it exceeds a certain value
+1. sends a notification e-mail when it exceeds a certain value
 2. closes the spider when it exceeds a certain value
 
 The notification e-mails can be triggered when a certain warning value is
@@ -222,15 +220,15 @@ can be configured with the following settings:
 * :setting:`MEMUSAGE_LIMIT_MB`
 * :setting:`MEMUSAGE_WARNING_MB`
 * :setting:`MEMUSAGE_NOTIFY_MAIL`
-* :setting:`MEMUSAGE_REPORT`
+* :setting:`MEMUSAGE_CHECK_INTERVAL_SECONDS`
 
 Memory debugger extension
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. module:: scrapy.contrib.memdebug
+.. module:: scrapy.extensions.memdebug
    :synopsis: Memory debugger extension
 
-.. class:: scrapy.contrib.memdebug.MemoryDebugger
+.. class:: MemoryDebugger
 
 An extension for debugging memory usage. It collects information about:
 
@@ -243,10 +241,10 @@ info will be stored in the stats.
 Close spider extension
 ~~~~~~~~~~~~~~~~~~~~~~
 
-.. module:: scrapy.contrib.closespider
+.. module:: scrapy.extensions.closespider
    :synopsis: Close spider extension
 
-.. class:: scrapy.contrib.closespider.CloseSpider
+.. class:: CloseSpider
 
 Closes a spider automatically when some conditions are met, using a specific
 closing reason for each condition.
@@ -279,9 +277,11 @@ CLOSESPIDER_ITEMCOUNT
 Default: ``0``
 
 An integer which specifies a number of items. If the spider scrapes more than
-that amount if items and those items are passed by the item pipeline, the
-spider will be closed with the reason ``closespider_itemcount``. If zero (or
-non set), spiders won't be closed by number of passed items.
+that amount and those items are passed by the item pipeline, the
+spider will be closed with the reason ``closespider_itemcount``.
+Requests which  are currently in the downloader queue (up to
+:setting:`CONCURRENT_REQUESTS` requests) are still processed.
+If zero (or non set), spiders won't be closed by number of passed items.
 
 .. setting:: CLOSESPIDER_PAGECOUNT
 
@@ -314,17 +314,17 @@ set), spiders won't be closed by number of errors.
 StatsMailer extension
 ~~~~~~~~~~~~~~~~~~~~~
 
-.. module:: scrapy.contrib.statsmailer
+.. module:: scrapy.extensions.statsmailer
    :synopsis: StatsMailer extension
 
-.. class:: scrapy.contrib.statsmailer.StatsMailer
+.. class:: StatsMailer
 
 This simple extension can be used to send a notification e-mail every time a
 domain has finished scraping, including the Scrapy stats collected. The email
 will be sent to all recipients specified in the :setting:`STATSMAILER_RCPTS`
 setting.
 
-.. module:: scrapy.contrib.debug
+.. module:: scrapy.extensions.debug
    :synopsis: Extensions for debugging Scrapy
 
 Debugging extensions
@@ -333,7 +333,7 @@ Debugging extensions
 Stack trace dump extension
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. class:: scrapy.contrib.debug.StackTraceDump
+.. class:: StackTraceDump
 
 Dumps information about the running process when a `SIGQUIT`_ or `SIGUSR2`_
 signal is received. The information dumped is the following:
@@ -345,7 +345,7 @@ signal is received. The information dumped is the following:
 After the stack trace and engine status is dumped, the Scrapy process continues
 running normally.
 
-This extension only works on POSIX-compliant platforms (ie. not Windows),
+This extension only works on POSIX-compliant platforms (i.e. not Windows),
 because the `SIGQUIT`_ and `SIGUSR2`_ signals are not available on Windows.
 
 There are at least two ways to send Scrapy the `SIGQUIT`_ signal:
@@ -356,21 +356,20 @@ There are at least two ways to send Scrapy the `SIGQUIT`_ signal:
 
     kill -QUIT <pid>
 
-.. _SIGUSR2: http://en.wikipedia.org/wiki/SIGUSR1_and_SIGUSR2
-.. _SIGQUIT: http://en.wikipedia.org/wiki/SIGQUIT
+.. _SIGUSR2: https://en.wikipedia.org/wiki/SIGUSR1_and_SIGUSR2
+.. _SIGQUIT: https://en.wikipedia.org/wiki/SIGQUIT
 
 Debugger extension
 ~~~~~~~~~~~~~~~~~~
 
-.. class:: scrapy.contrib.debug.Debugger
+.. class:: Debugger
 
-Invokes a `Python debugger`_ inside a running Scrapy process when a `SIGUSR2`_
+Invokes a :doc:`Python debugger <library/pdb>` inside a running Scrapy process when a `SIGUSR2`_
 signal is received. After the debugger is exited, the Scrapy process continues
 running normally.
 
-For more info see `Debugging in Python`.
+For more info see `Debugging in Python`_.
 
-This extension only works on POSIX-compliant platforms (ie. not Windows).
+This extension only works on POSIX-compliant platforms (i.e. not Windows).
 
-.. _Python debugger: http://docs.python.org/library/pdb.html
-.. _Debugging in Python: http://www.ferg.org/papers/debugging_in_python.html
+.. _Debugging in Python: https://pythonconquerstheuniverse.wordpress.com/2009/09/10/debugging-in-python/
diff --git a/docs/topics/feed-exports.rst b/docs/topics/feed-exports.rst
index e81db6489..cd4f7cf29 100644
--- a/docs/topics/feed-exports.rst
+++ b/docs/topics/feed-exports.rst
@@ -8,11 +8,11 @@ Feed exports
 
 One of the most frequently required features when implementing scrapers is
 being able to store the scraped data properly and, quite often, that means
-generating a "export file" with the scraped data (commonly called "export
+generating an "export file" with the scraped data (commonly called "export
 feed") to be consumed by other systems.
 
 Scrapy provides this functionality out of the box with the Feed Exports, which
-allows you to generate a feed with the scraped items, using multiple
+allows you to generate feeds with the scraped items, using multiple
 serialization formats and storage backends.
 
 .. _topics-feed-format:
@@ -21,7 +21,7 @@ Serialization formats
 =====================
 
 For serializing the scraped data, the feed exports use the :ref:`Item exporters
-<topics-exporters>` and these formats are supported out of the box:
+<topics-exporters>`. These formats are supported out of the box:
 
  * :ref:`topics-feed-format-json`
  * :ref:`topics-feed-format-jsonlines`
@@ -30,55 +30,60 @@ For serializing the scraped data, the feed exports use the :ref:`Item exporters
 
 But you can also extend the supported format through the
 :setting:`FEED_EXPORTERS` setting.
- 
+
 .. _topics-feed-format-json:
 
 JSON
 ----
 
- * :setting:`FEED_FORMAT`: ``json``
- * Exporter used: :class:`~scrapy.contrib.exporter.JsonItemExporter`
- * See :ref:`this warning <json-with-large-data>` if you're using JSON with large feeds
+ * Value for the ``format`` key in the :setting:`FEEDS` setting: ``json``
+ * Exporter used: :class:`~scrapy.exporters.JsonItemExporter`
+ * See :ref:`this warning <json-with-large-data>` if you're using JSON with
+   large feeds.
 
 .. _topics-feed-format-jsonlines:
 
 JSON lines
 ----------
 
- * :setting:`FEED_FORMAT`: ``jsonlines``
- * Exporter used: :class:`~scrapy.contrib.exporter.JsonLinesItemExporter`
+ * Value for the ``format`` key in the :setting:`FEEDS` setting: ``jsonlines``
+ * Exporter used: :class:`~scrapy.exporters.JsonLinesItemExporter`
 
 .. _topics-feed-format-csv:
 
 CSV
 ---
 
- * :setting:`FEED_FORMAT`: ``csv``
- * Exporter used: :class:`~scrapy.contrib.exporter.CsvItemExporter`
+ * Value for the ``format`` key in the :setting:`FEEDS` setting: ``csv``
+ * Exporter used: :class:`~scrapy.exporters.CsvItemExporter`
+ * To specify columns to export and their order use
+   :setting:`FEED_EXPORT_FIELDS`. Other feed exporters can also use this
+   option, but it is important for CSV because unlike many other export
+   formats CSV uses a fixed header.
 
 .. _topics-feed-format-xml:
 
 XML
 ---
 
- * :setting:`FEED_FORMAT`: ``xml``
- * Exporter used: :class:`~scrapy.contrib.exporter.XmlItemExporter`
+ * Value for the ``format`` key in the :setting:`FEEDS` setting: ``xml``
+ * Exporter used: :class:`~scrapy.exporters.XmlItemExporter`
 
 .. _topics-feed-format-pickle:
 
 Pickle
 ------
 
- * :setting:`FEED_FORMAT`: ``pickle``
- * Exporter used: :class:`~scrapy.contrib.exporter.PickleItemExporter`
+ * Value for the ``format`` key in the :setting:`FEEDS` setting: ``pickle``
+ * Exporter used: :class:`~scrapy.exporters.PickleItemExporter`
 
 .. _topics-feed-format-marshal:
 
 Marshal
 -------
 
- * :setting:`FEED_FORMAT`: ``marshal``
- * Exporter used: :class:`~scrapy.contrib.exporter.MarshalItemExporter`
+ * Value for the ``format`` key in the :setting:`FEEDS` setting: ``marshal``
+ * Exporter used: :class:`~scrapy.exporters.MarshalItemExporter`
 
 
 .. _topics-feed-storage:
@@ -86,19 +91,20 @@ Marshal
 Storages
 ========
 
-When using the feed exports you define where to store the feed using a URI_
-(through the :setting:`FEED_URI` setting). The feed exports supports multiple
+When using the feed exports you define where to store the feed using one or multiple URIs_
+(through the :setting:`FEEDS` setting). The feed exports supports multiple
 storage backend types which are defined by the URI scheme.
 
 The storages backends supported out of the box are:
 
  * :ref:`topics-feed-storage-fs`
  * :ref:`topics-feed-storage-ftp`
- * :ref:`topics-feed-storage-s3` (requires boto_)
+ * :ref:`topics-feed-storage-s3` (requires botocore_)
+ * :ref:`topics-feed-storage-gcs` (requires `google-cloud-storage`_)
  * :ref:`topics-feed-storage-stdout`
 
 Some storage backends may be unavailable if the required external libraries are
-not available. For example, the S3 backend is only available if the boto_
+not available. For example, the S3 backend is only available if the botocore_
 library is installed.
 
 
@@ -159,6 +165,14 @@ The feeds are stored in a FTP server.
  * Example URI: ``ftp://user:pass@ftp.example.com/path/to/export.csv``
  * Required external libraries: none
 
+FTP supports two different connection modes: `active or passive
+<https://stackoverflow.com/a/1699163>`_. Scrapy uses the passive connection
+mode by default. To use the active connection mode instead, set the
+:setting:`FEED_STORAGE_FTP_ACTIVE` setting to ``True``.
+
+This storage backend uses :ref:`delayed file delivery <delayed-file-delivery>`.
+
+
 .. _topics-feed-storage-s3:
 
 S3
@@ -172,7 +186,7 @@ The feeds are stored on `Amazon S3`_.
    * ``s3://mybucket/path/to/export.csv``
    * ``s3://aws_key:aws_secret@mybucket/path/to/export.csv``
 
- * Required external libraries: `boto`_
+ * Required external libraries: `botocore`_
 
 The AWS credentials can be passed as user/password in the URI, or they can be
 passed through the following settings:
@@ -180,6 +194,41 @@ passed through the following settings:
  * :setting:`AWS_ACCESS_KEY_ID`
  * :setting:`AWS_SECRET_ACCESS_KEY`
 
+You can also define a custom ACL for exported feeds using this setting:
+
+ * :setting:`FEED_STORAGE_S3_ACL`
+
+This storage backend uses :ref:`delayed file delivery <delayed-file-delivery>`.
+
+
+.. _topics-feed-storage-gcs:
+
+Google Cloud Storage (GCS)
+--------------------------
+
+.. versionadded:: 2.3
+
+The feeds are stored on `Google Cloud Storage`_.
+
+ * URI scheme: ``gs``
+ * Example URIs:
+
+   * ``gs://mybucket/path/to/export.csv``
+
+ * Required external libraries: `google-cloud-storage`_.
+
+For more information about authentication, please refer to `Google Cloud documentation <https://cloud.google.com/docs/authentication/production>`_.
+
+You can set a *Project ID* and *Access Control List (ACL)* through the following settings:
+
+ * :setting:`FEED_STORAGE_GCS_ACL`
+ * :setting:`GCS_PROJECT_ID`
+
+This storage backend uses :ref:`delayed file delivery <delayed-file-delivery>`.
+
+.. _google-cloud-storage: https://cloud.google.com/storage/docs/reference/libraries#client-libraries-install-python
+
+
 .. _topics-feed-storage-stdout:
 
 Standard output
@@ -192,38 +241,171 @@ The feeds are written to the standard output of the Scrapy process.
  * Required external libraries: none
 
 
+.. _delayed-file-delivery:
+
+Delayed file delivery
+---------------------
+
+As indicated above, some of the described storage backends use delayed file
+delivery.
+
+These storage backends do not upload items to the feed URI as those items are
+scraped. Instead, Scrapy writes items into a temporary local file, and only
+once all the file contents have been written (i.e. at the end of the crawl) is
+that file uploaded to the feed URI.
+
+If you want item delivery to start earlier when using one of these storage
+backends, use :setting:`FEED_EXPORT_BATCH_ITEM_COUNT` to split the output items
+in multiple files, with the specified maximum item count per file. That way, as
+soon as a file reaches the maximum item count, that file is delivered to the
+feed URI, allowing item delivery to start way before the end of the crawl.
+
+
 Settings
 ========
 
 These are the settings used for configuring the feed exports:
 
- * :setting:`FEED_URI` (mandatory)
- * :setting:`FEED_FORMAT`
- * :setting:`FEED_STORAGES`
- * :setting:`FEED_EXPORTERS`
+ * :setting:`FEEDS` (mandatory)
+ * :setting:`FEED_EXPORT_ENCODING`
  * :setting:`FEED_STORE_EMPTY`
+ * :setting:`FEED_EXPORT_FIELDS`
+ * :setting:`FEED_EXPORT_INDENT`
+ * :setting:`FEED_STORAGES`
+ * :setting:`FEED_STORAGE_FTP_ACTIVE`
+ * :setting:`FEED_STORAGE_S3_ACL`
+ * :setting:`FEED_EXPORTERS`
+ * :setting:`FEED_EXPORT_BATCH_ITEM_COUNT`
 
-.. currentmodule:: scrapy.contrib.feedexport
+.. currentmodule:: scrapy.extensions.feedexport
 
-.. setting:: FEED_URI
+.. setting:: FEEDS
 
-FEED_URI
---------
+FEEDS
+-----
+
+.. versionadded:: 2.1
+
+Default: ``{}``
+
+A dictionary in which every key is a feed URI (or a :class:`pathlib.Path`
+object) and each value is a nested dictionary containing configuration
+parameters for the specific feed.
+
+This setting is required for enabling the feed export feature.
+
+See :ref:`topics-feed-storage-backends` for supported URI schemes.
+
+For instance::
+
+    {
+        'items.json': {
+            'format': 'json',
+            'encoding': 'utf8',
+            'store_empty': False,
+            'fields': None,
+            'indent': 4,
+        }, 
+        '/home/user/documents/items.xml': {
+            'format': 'xml',
+            'fields': ['name', 'price'],
+            'encoding': 'latin1',
+            'indent': 8,
+        },
+        pathlib.Path('items.csv'): {
+            'format': 'csv',
+            'fields': ['price', 'name'],
+        },
+    }
+
+The following is a list of the accepted keys and the setting that is used
+as a fallback value if that key is not provided for a specific feed definition:
+
+-   ``format``: the :ref:`serialization format <topics-feed-format>`.
+
+    This setting is mandatory, there is no fallback value.
+
+-   ``batch_item_count``: falls back to
+    :setting:`FEED_EXPORT_BATCH_ITEM_COUNT`.
+
+-   ``encoding``: falls back to :setting:`FEED_EXPORT_ENCODING`.
+
+-   ``fields``: falls back to :setting:`FEED_EXPORT_FIELDS`.
+
+-   ``indent``: falls back to :setting:`FEED_EXPORT_INDENT`.
+
+-   ``overwrite``: whether to overwrite the file if it already exists
+    (``True``) or append to its content (``False``).
+
+    The default value depends on the :ref:`storage backend
+    <topics-feed-storage-backends>`:
+
+    -   :ref:`topics-feed-storage-fs`: ``False``
+
+    -   :ref:`topics-feed-storage-ftp`: ``True``
+
+        .. note:: Some FTP servers may not support appending to files (the
+                  ``APPE`` FTP command).
+
+    -   :ref:`topics-feed-storage-s3`: ``True`` (appending `is not supported
+        <https://forums.aws.amazon.com/message.jspa?messageID=540395>`_)
+
+    -   :ref:`topics-feed-storage-stdout`: ``False`` (overwriting is not supported)
+
+-   ``store_empty``: falls back to :setting:`FEED_STORE_EMPTY`.
+
+-   ``uri_params``: falls back to :setting:`FEED_URI_PARAMS`.
+
+
+.. setting:: FEED_EXPORT_ENCODING
+
+FEED_EXPORT_ENCODING
+--------------------
 
 Default: ``None``
 
-The URI of the export feed. See :ref:`topics-feed-storage-backends` for
-supported URI schemes.
+The encoding to be used for the feed.
 
-This setting is required for enabling the feed exports.
+If unset or set to ``None`` (default) it uses UTF-8 for everything except JSON output,
+which uses safe numeric encoding (``\uXXXX`` sequences) for historic reasons.
 
-.. setting:: FEED_FORMAT
+Use ``utf-8`` if you want UTF-8 for JSON too.
 
-FEED_FORMAT
------------
+.. setting:: FEED_EXPORT_FIELDS
 
-The serialization format to be used for the feed. See
-:ref:`topics-feed-format` for possible values.
+FEED_EXPORT_FIELDS
+------------------
+
+Default: ``None``
+
+A list of fields to export, optional.
+Example: ``FEED_EXPORT_FIELDS = ["foo", "bar", "baz"]``.
+
+Use FEED_EXPORT_FIELDS option to define fields to export and their order.
+
+When FEED_EXPORT_FIELDS is empty or None (default), Scrapy uses the fields
+defined in :ref:`item objects <topics-items>` yielded by your spider.
+
+If an exporter requires a fixed set of fields (this is the case for
+:ref:`CSV <topics-feed-format-csv>` export format) and FEED_EXPORT_FIELDS
+is empty or None, then Scrapy tries to infer field names from the
+exported data - currently it uses field names from the first item.
+
+.. setting:: FEED_EXPORT_INDENT
+
+FEED_EXPORT_INDENT
+------------------
+
+Default: ``0``
+
+Amount of spaces used to indent the output on each level. If ``FEED_EXPORT_INDENT``
+is a non-negative integer, then array elements and object members will be pretty-printed
+with that indent level. An indent level of ``0`` (the default), or negative,
+will put each item on a new line. ``None`` selects the most compact representation.
+
+Currently implemented only by :class:`~scrapy.exporters.JsonItemExporter`
+and :class:`~scrapy.exporters.XmlItemExporter`, i.e. when you are exporting
+to ``.json`` or ``.xml``.
 
 .. setting:: FEED_STORE_EMPTY
 
@@ -232,64 +414,207 @@ FEED_STORE_EMPTY
 
 Default: ``False``
 
-Whether to export empty feeds (ie. feeds with no items).
+Whether to export empty feeds (i.e. feeds with no items).
 
 .. setting:: FEED_STORAGES
 
 FEED_STORAGES
 -------------
 
-Default:: ``{}``
+Default: ``{}``
 
 A dict containing additional feed storage backends supported by your project.
 The keys are URI schemes and the values are paths to storage classes.
 
+.. setting:: FEED_STORAGE_FTP_ACTIVE
+
+FEED_STORAGE_FTP_ACTIVE
+-----------------------
+
+Default: ``False``
+
+Whether to use the active connection mode when exporting feeds to an FTP server
+(``True``) or use the passive connection mode instead (``False``, default).
+
+For information about FTP connection modes, see `What is the difference between
+active and passive FTP? <https://stackoverflow.com/a/1699163>`_.
+
+.. setting:: FEED_STORAGE_S3_ACL
+
+FEED_STORAGE_S3_ACL
+-------------------
+
+Default: ``''`` (empty string)
+
+A string containing a custom ACL for feeds exported to Amazon S3 by your project.
+
+For a complete list of available values, access the `Canned ACL`_ section on Amazon S3 docs.
+
 .. setting:: FEED_STORAGES_BASE
 
 FEED_STORAGES_BASE
 ------------------
 
-Default:: 
+Default::
 
     {
-        '': 'scrapy.contrib.feedexport.FileFeedStorage',
-        'file': 'scrapy.contrib.feedexport.FileFeedStorage',
-        'stdout': 'scrapy.contrib.feedexport.StdoutFeedStorage',
-        's3': 'scrapy.contrib.feedexport.S3FeedStorage',
-        'ftp': 'scrapy.contrib.feedexport.FTPFeedStorage',
+        '': 'scrapy.extensions.feedexport.FileFeedStorage',
+        'file': 'scrapy.extensions.feedexport.FileFeedStorage',
+        'stdout': 'scrapy.extensions.feedexport.StdoutFeedStorage',
+        's3': 'scrapy.extensions.feedexport.S3FeedStorage',
+        'ftp': 'scrapy.extensions.feedexport.FTPFeedStorage',
     }
 
-A dict containing the built-in feed storage backends supported by Scrapy.
+A dict containing the built-in feed storage backends supported by Scrapy. You
+can disable any of these backends by assigning ``None`` to their URI scheme in
+:setting:`FEED_STORAGES`. E.g., to disable the built-in FTP storage backend
+(without replacement), place this in your ``settings.py``::
+
+    FEED_STORAGES = {
+        'ftp': None,
+    }
 
 .. setting:: FEED_EXPORTERS
 
 FEED_EXPORTERS
 --------------
 
-Default:: ``{}``
+Default: ``{}``
 
 A dict containing additional exporters supported by your project. The keys are
-URI schemes and the values are paths to :ref:`Item exporter <topics-exporters>`
-classes.
+serialization formats and the values are paths to :ref:`Item exporter
+<topics-exporters>` classes.
 
 .. setting:: FEED_EXPORTERS_BASE
 
 FEED_EXPORTERS_BASE
 -------------------
+Default::
 
-Default:: 
-
-    FEED_EXPORTERS_BASE = {
-        'json': 'scrapy.contrib.exporter.JsonItemExporter',
-        'jsonlines': 'scrapy.contrib.exporter.JsonLinesItemExporter',
-        'csv': 'scrapy.contrib.exporter.CsvItemExporter',
-        'xml': 'scrapy.contrib.exporter.XmlItemExporter',
-        'marshal': 'scrapy.contrib.exporter.MarshalItemExporter',
+    {
+        'json': 'scrapy.exporters.JsonItemExporter',
+        'jsonlines': 'scrapy.exporters.JsonLinesItemExporter',
+        'jl': 'scrapy.exporters.JsonLinesItemExporter',
+        'csv': 'scrapy.exporters.CsvItemExporter',
+        'xml': 'scrapy.exporters.XmlItemExporter',
+        'marshal': 'scrapy.exporters.MarshalItemExporter',
+        'pickle': 'scrapy.exporters.PickleItemExporter',
     }
 
-A dict containing the built-in feed exporters supported by Scrapy.
+A dict containing the built-in feed exporters supported by Scrapy. You can
+disable any of these exporters by assigning ``None`` to their serialization
+format in :setting:`FEED_EXPORTERS`. E.g., to disable the built-in CSV exporter
+(without replacement), place this in your ``settings.py``::
+
+    FEED_EXPORTERS = {
+        'csv': None,
+    }
 
 
-.. _URI: http://en.wikipedia.org/wiki/Uniform_Resource_Identifier
-.. _Amazon S3: http://aws.amazon.com/s3/
-.. _boto: http://code.google.com/p/boto/
+.. setting:: FEED_EXPORT_BATCH_ITEM_COUNT
+
+FEED_EXPORT_BATCH_ITEM_COUNT
+-----------------------------
+
+Default: ``0``
+
+If assigned an integer number higher than ``0``, Scrapy generates multiple output files
+storing up to the specified number of items in each output file.
+
+When generating multiple output files, you must use at least one of the following
+placeholders in the feed URI to indicate how the different output file names are
+generated:
+
+* ``%(batch_time)s`` - gets replaced by a timestamp when the feed is being created
+  (e.g. ``2020-03-28T14-45-08.237134``)
+
+* ``%(batch_id)d`` - gets replaced by the 1-based sequence number of the batch.
+
+  Use :ref:`printf-style string formatting <python:old-string-formatting>` to
+  alter the number format. For example, to make the batch ID a 5-digit
+  number by introducing leading zeroes as needed, use ``%(batch_id)05d``
+  (e.g. ``3`` becomes ``00003``, ``123`` becomes ``00123``).
+
+For instance, if your settings include::
+
+    FEED_EXPORT_BATCH_ITEM_COUNT = 100
+
+And your :command:`crawl` command line is::
+
+    scrapy crawl spidername -o "dirname/%(batch_id)d-filename%(batch_time)s.json"
+
+The command line above can generate a directory tree like::
+
+    ->projectname
+    -->dirname
+    --->1-filename2020-03-28T14-45-08.237134.json
+    --->2-filename2020-03-28T14-45-09.148903.json
+    --->3-filename2020-03-28T14-45-10.046092.json
+
+Where the first and second files contain exactly 100 items. The last one contains
+100 items or fewer.
+
+
+.. setting:: FEED_URI_PARAMS
+
+FEED_URI_PARAMS
+---------------
+
+Default: ``None``
+
+A string with the import path of a function to set the parameters to apply with
+:ref:`printf-style string formatting <python:old-string-formatting>` to the
+feed URI.
+
+The function signature should be as follows:
+
+.. function:: uri_params(params, spider)
+
+   Return a :class:`dict` of key-value pairs to apply to the feed URI using
+   :ref:`printf-style string formatting <python:old-string-formatting>`.
+
+   :param params: default key-value pairs
+
+        Specifically:
+
+        -   ``batch_id``: ID of the file batch. See
+            :setting:`FEED_EXPORT_BATCH_ITEM_COUNT`.
+
+            If :setting:`FEED_EXPORT_BATCH_ITEM_COUNT` is ``0``, ``batch_id``
+            is always ``1``.
+
+        -   ``batch_time``: UTC date and time, in ISO format with ``:``
+            replaced with ``-``.
+
+            See :setting:`FEED_EXPORT_BATCH_ITEM_COUNT`.
+
+        -   ``time``: ``batch_time``, with microseconds set to ``0``.
+   :type params: dict
+
+   :param spider: source spider of the feed items
+   :type spider: scrapy.spiders.Spider
+
+For example, to include the :attr:`name <scrapy.spiders.Spider.name>` of the
+source spider in the feed URI:
+
+#.  Define the following function somewhere in your project::
+
+        # myproject/utils.py
+        def uri_params(params, spider):
+            return {**params, 'spider_name': spider.name}
+
+#.  Point :setting:`FEED_URI_PARAMS` to that function in your settings::
+
+        # myproject/settings.py
+        FEED_URI_PARAMS = 'myproject.utils.uri_params'
+
+#.  Use ``%(spider_name)s`` in your feed URI::
+
+        scrapy crawl <spider_name> -o "%(spider_name)s.jl"
+
+
+.. _URIs: https://en.wikipedia.org/wiki/Uniform_Resource_Identifier
+.. _Amazon S3: https://aws.amazon.com/s3/
+.. _botocore: https://github.com/boto/botocore
+.. _Canned ACL: https://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl
+.. _Google Cloud Storage: https://cloud.google.com/storage/
diff --git a/docs/topics/firebug.rst b/docs/topics/firebug.rst
deleted file mode 100644
index 7dd5a2b76..000000000
--- a/docs/topics/firebug.rst
+++ /dev/null
@@ -1,167 +0,0 @@
-.. _topics-firebug:
-
-==========================
-Using Firebug for scraping
-==========================
-
-.. note:: Google Directory, the example website used in this guide is no longer
-   available as it `has been shut down by Google`_. The concepts in this guide
-   are still valid though. If you want to update this guide to use a new
-   (working) site, your contribution will be more than welcome!. See :ref:`topics-contributing`
-   for information on how to do so.
-
-Introduction
-============
-
-This document explains how to use `Firebug`_ (a Firefox add-on) to make the
-scraping process easier and more fun. For other useful Firefox add-ons see
-:ref:`topics-firefox-addons`. There are some caveats with using Firefox add-ons
-to inspect pages, see :ref:`topics-firefox-livedom`.
-
-In this example, we'll show how to use `Firebug`_ to scrape data from the
-`Google Directory`_, which contains the same data as the `Open Directory
-Project`_ used in the :ref:`tutorial <intro-tutorial>` but with a different
-face.
-
-.. _Firebug: http://getfirebug.com
-.. _Google Directory: http://directory.google.com/
-.. _Open Directory Project: http://www.dmoz.org
-
-Firebug comes with a very useful feature called `Inspect Element`_ which allows
-you to inspect the HTML code of the different page elements just by hovering
-your mouse over them. Otherwise you would have to search for the tags manually
-through the HTML body which can be a very tedious task.
-
-.. _Inspect Element: http://www.youtube.com/watch?v=-pT_pDe54aA
-
-In the following screenshot you can see the `Inspect Element`_ tool in action.
-
-.. image:: _images/firebug1.png
-   :width: 913
-   :height: 600
-   :alt: Inspecting elements with Firebug
-
-At first sight, we can see that the directory is divided in categories, which
-are also divided in subcategories.
-
-However, it seems that there are more subcategories than the ones being shown
-in this page, so we'll keep looking:
-
-.. image:: _images/firebug2.png
-   :width: 819
-   :height: 629
-   :alt: Inspecting elements with Firebug
-
-As expected, the subcategories contain links to other subcategories, and also
-links to actual websites, which is the purpose of the directory.
-
-Getting links to follow
-=======================
-
-By looking at the category URLs we can see they share a pattern:
-
-    http://directory.google.com/Category/Subcategory/Another_Subcategory
-
-Once we know that, we are able to construct a regular expression to follow
-those links. For example, the following one::
-
-    directory\.google\.com/[A-Z][a-zA-Z_/]+$
-
-So, based on that regular expression we can create the first crawling rule::
-
-    Rule(LinkExtractor(allow='directory.google.com/[A-Z][a-zA-Z_/]+$', ),
-        'parse_category',
-        follow=True,
-    ),
-
-The :class:`~scrapy.contrib.spiders.Rule` object instructs
-:class:`~scrapy.contrib.spiders.CrawlSpider` based spiders how to follow the
-category links. ``parse_category`` will be a method of the spider which will
-process and extract data from those pages.
-
-This is how the spider would look so far::
-
-   from scrapy.contrib.linkextractors import LinkExtractor
-   from scrapy.contrib.spiders import CrawlSpider, Rule
-
-   class GoogleDirectorySpider(CrawlSpider):
-       name = 'directory.google.com'
-       allowed_domains = ['directory.google.com']
-       start_urls = ['http://directory.google.com/']
-
-       rules = (
-           Rule(LinkExtractor(allow='directory\.google\.com/[A-Z][a-zA-Z_/]+$'),
-               'parse_category', follow=True,
-           ),
-       )
-
-       def parse_category(self, response):
-           # write the category page data extraction code here
-           pass
-
-
-Extracting the data
-===================
-
-Now we're going to write the code to extract data from those pages.
-
-With the help of Firebug, we'll take a look at some page containing links to
-websites (say http://directory.google.com/Top/Arts/Awards/) and find out how we can
-extract those links using :ref:`Selectors <topics-selectors>`. We'll also
-use the :ref:`Scrapy shell <topics-shell>` to test those XPath's and make sure
-they work as we expect.
-
-.. image:: _images/firebug3.png
-   :width: 965
-   :height: 751
-   :alt: Inspecting elements with Firebug
-
-As you can see, the page markup is not very descriptive: the elements don't
-contain ``id``, ``class`` or any attribute that clearly identifies them, so
-we''ll use the ranking bars as a reference point to select the data to extract
-when we construct our XPaths.
-
-After using FireBug, we can see that each link is inside a ``td`` tag, which is
-itself inside a ``tr`` tag that also contains the link's ranking bar (in
-another ``td``).
-
-So we can select the ranking bar, then find its parent (the ``tr``), and then
-finally, the link's ``td`` (which contains the data we want to scrape).
-
-This results in the following XPath::
-
-    //td[descendant::a[contains(@href, "#pagerank")]]/following-sibling::td//a
-
-It's important to use the :ref:`Scrapy shell <topics-shell>` to test these
-complex XPath expressions and make sure they work as expected.
-
-Basically, that expression will look for the ranking bar's ``td`` element, and
-then select any ``td`` element who has a descendant ``a`` element whose
-``href`` attribute contains the string ``#pagerank``"
-
-Of course, this is not the only XPath, and maybe not the simpler one to select
-that data. Another approach could be, for example, to find any ``font`` tags
-that have that grey colour of the links,
-
-Finally, we can write our ``parse_category()`` method::
-
-    def parse_category(self, response):
-        # The path to website links in directory page
-        links = response.xpath('//td[descendant::a[contains(@href, "#pagerank")]]/following-sibling::td/font')
-
-        for link in links:
-            item = DirectoryItem()
-            item['name'] = link.xpath('a/text()').extract()
-            item['url'] = link.xpath('a/@href').extract()
-            item['description'] = link.xpath('font[2]/text()').extract()
-            yield item
-
-
-Be aware that you may find some elements which appear in Firebug but
-not in the original HTML, such as the typical case of ``<tbody>``
-elements.
-
-or tags which Therefer   in page HTML
-sources may on Firebug inspects the live DOM
-
-.. _has been shut down by Google: http://searchenginewatch.com/article/2096661/Google-Directory-Has-Been-Shut-Down
diff --git a/docs/topics/firefox.rst b/docs/topics/firefox.rst
deleted file mode 100644
index f0b8eb594..000000000
--- a/docs/topics/firefox.rst
+++ /dev/null
@@ -1,82 +0,0 @@
-.. _topics-firefox:
-
-==========================
-Using Firefox for scraping
-==========================
-
-Here is a list of tips and advice on using Firefox for scraping, along with a
-list of useful Firefox add-ons to ease the scraping process.
-
-.. _topics-firefox-livedom:
-
-Caveats with inspecting the live browser DOM
-============================================
-
-Since Firefox add-ons operate on a live browser DOM, what you'll actually see
-when inspecting the page source is not the original HTML, but a modified one
-after applying some browser clean up and executing Javascript code.  Firefox,
-in particular, is known for adding ``<tbody>`` elements to tables.  Scrapy, on
-the other hand, does not modify the original page HTML, so you won't be able to
-extract any data if you use ``<tbody`` in your XPath expressions. 
-
-Therefore, you should keep in mind the following things when working with
-Firefox and XPath:
-
-* Disable Firefox Javascript while inspecting the DOM looking for XPaths to be
-  used in Scrapy
-
-* Never use full XPath paths, use relative and clever ones based on attributes
-  (such as ``id``, ``class``, ``width``, etc) or any identifying features like
-  ``contains(@href, 'image')``.
-
-* Never include ``<tbody>`` elements in your XPath expressions unless you
-  really know what you're doing
-
-.. _topics-firefox-addons:
-
-Useful Firefox add-ons for scraping
-===================================
-
-Firebug
--------
-
-`Firebug`_ is a widely known tool among web developers and it's also very
-useful for scraping. In particular, its `Inspect Element`_ feature comes very
-handy when you need to construct the XPaths for extracting data because it
-allows you to view the HTML code of each page element while moving your mouse
-over it.
-
-See :ref:`topics-firebug` for a detailed guide on how to use Firebug with
-Scrapy.
-
-XPather
--------
-
-`XPather`_ allows you to test XPath expressions directly on the pages.
-
-XPath Checker
--------------
-
-`XPath Checker`_ is another Firefox add-on for testing XPaths on your pages.
-
-Tamper Data
------------
-
-`Tamper Data`_ is a Firefox add-on which allows you to view and modify the HTTP
-request headers sent by Firefox. Firebug also allows to view HTTP headers, but
-not to modify them.
-
-Firecookie
-----------
-
-`Firecookie`_ makes it easier to view and manage cookies. You can use this
-extension to create a new cookie, delete existing cookies, see a list of cookies
-for the current site, manage cookies permissions and a lot more. 
-
-.. _Firebug: http://getfirebug.com
-.. _Inspect Element: http://www.youtube.com/watch?v=-pT_pDe54aA
-.. _XPather: https://addons.mozilla.org/firefox/addon/1192 
-.. _XPath Checker: https://addons.mozilla.org/firefox/addon/1095
-.. _Tamper Data: http://addons.mozilla.org/firefox/addon/966
-.. _Firecookie: https://addons.mozilla.org/firefox/addon/6683
-
diff --git a/docs/topics/images.rst b/docs/topics/images.rst
deleted file mode 100644
index 91e59d2a9..000000000
--- a/docs/topics/images.rst
+++ /dev/null
@@ -1,323 +0,0 @@
-.. _topics-images:
-
-=======================
-Downloading Item Images
-=======================
-
-.. currentmodule:: scrapy.contrib.pipeline.images
-
-Scrapy provides an :doc:`item pipeline </topics/item-pipeline>` for downloading
-images attached to a particular item, for example, when you scrape products and
-also want to download their images locally.
-
-This pipeline, called the Images Pipeline and implemented in the
-:class:`ImagesPipeline` class, provides a convenient way for
-downloading and storing images locally with some additional features:
-
-* Convert all downloaded images to a common format (JPG) and mode (RGB)
-* Avoid re-downloading images which were downloaded recently
-* Thumbnail generation
-* Check images width/height to make sure they meet a minimum constraint
-
-This pipeline also keeps an internal queue of those images which are currently
-being scheduled for download, and connects those items that arrive containing
-the same image, to that queue. This avoids downloading the same image more than
-once when it's shared by several items.
-
-`Pillow`_ is used for thumbnailing and normalizing images to JPEG/RGB format,
-so you need to install this library in order to use the images pipeline.
-`Python Imaging Library`_ (PIL) should also work in most cases, but it
-is known to cause troubles in some setups, so we recommend to use `Pillow`_
-instead of `PIL <Python Imaging Library>`_.
-
-.. _Pillow: https://github.com/python-imaging/Pillow
-.. _Python Imaging Library: http://www.pythonware.com/products/pil/
-
-Using the Images Pipeline
-=========================
-
-The typical workflow, when using the :class:`ImagesPipeline` goes like
-this:
-
-1. In a Spider, you scrape an item and put the URLs of its images into a
-   ``image_urls`` field.
-
-2. The item is returned from the spider and goes to the item pipeline.
-
-3. When the item reaches the :class:`ImagesPipeline`, the URLs in the
-   ``image_urls`` field are scheduled for download using the standard
-   Scrapy scheduler and downloader (which means the scheduler and downloader
-   middlewares are reused), but with a higher priority, processing them before other
-   pages are scraped. The item remains "locked" at that particular pipeline stage
-   until the images have finish downloading (or fail for some reason).
-
-4. When the images are downloaded another field (``images``) will be populated
-   with the results. This field will contain a list of dicts with information
-   about the images downloaded, such as the downloaded path, the original
-   scraped url (taken from the ``image_urls`` field) , and the image checksum.
-   The images in the list of the ``images`` field will retain the same order of
-   the original ``image_urls`` field. If some image failed downloading, an
-   error will be logged and the image won't be present in the ``images`` field.
-
-
-Usage example
-=============
-
-In order to use the image pipeline you just need to :ref:`enable it
-<topics-images-enabling>` and define an item with the ``image_urls`` and
-``images`` fields::
-
-    import scrapy
-
-    class MyItem(scrapy.Item):
-
-        # ... other item fields ...
-        image_urls = scrapy.Field()
-        images = scrapy.Field()
-
-If you need something more complex and want to override the custom images
-pipeline behaviour, see :ref:`topics-images-override`.
-
-.. _topics-images-enabling:
-
-Enabling your Images Pipeline
-=============================
-
-.. setting:: IMAGES_STORE
-
-To enable your images pipeline you must first add it to your project
-:setting:`ITEM_PIPELINES` setting::
-
-    ITEM_PIPELINES = {'scrapy.contrib.pipeline.images.ImagesPipeline': 1}
-
-And set the :setting:`IMAGES_STORE` setting to a valid directory that will be
-used for storing the downloaded images. Otherwise the pipeline will remain
-disabled, even if you include it in the :setting:`ITEM_PIPELINES` setting.
-
-For example::
-
-   IMAGES_STORE = '/path/to/valid/dir'
-
-Images Storage
-==============
-
-File system is currently the only officially supported storage, but there is
-also (undocumented) support for `Amazon S3`_.
-
-.. _Amazon S3: https://s3.amazonaws.com/
-
-File system storage
--------------------
-
-The images are stored in files (one per image), using a `SHA1 hash`_ of their
-URLs for the file names.
-
-For example, the following image URL::
-
-    http://www.example.com/image.jpg
-
-Whose `SHA1 hash` is::
-
-    3afec3b4765f8f0a07b78f98c07b83f013567a0a
-
-Will be downloaded and stored in the following file::
-
-   <IMAGES_STORE>/full/3afec3b4765f8f0a07b78f98c07b83f013567a0a.jpg
-
-Where:
-
-* ``<IMAGES_STORE>`` is the directory defined in :setting:`IMAGES_STORE` setting
-
-* ``full`` is a sub-directory to separate full images from thumbnails (if
-  used). For more info see :ref:`topics-images-thumbnails`.
-
-Additional features
-===================
-
-Image expiration
-----------------
-
-.. setting:: IMAGES_EXPIRES
-
-The Image Pipeline avoids downloading images that were downloaded recently. To
-adjust this retention delay use the :setting:`IMAGES_EXPIRES` setting, which
-specifies the delay in number of days::
-
-    # 90 days of delay for image expiration
-    IMAGES_EXPIRES = 90
-
-.. _topics-images-thumbnails:
-
-Thumbnail generation
---------------------
-
-The Images Pipeline can automatically create thumbnails of the downloaded
-images.
-
-.. setting:: IMAGES_THUMBS
-
-In order use this feature, you must set :setting:`IMAGES_THUMBS` to a dictionary
-where the keys are the thumbnail names and the values are their dimensions.
-
-For example::
-
-   IMAGES_THUMBS = {
-       'small': (50, 50),
-       'big': (270, 270),
-   }
-
-When you use this feature, the Images Pipeline will create thumbnails of the
-each specified size with this format::
-
-    <IMAGES_STORE>/thumbs/<size_name>/<image_id>.jpg
-
-Where:
-
-* ``<size_name>`` is the one specified in the :setting:`IMAGES_THUMBS`
-  dictionary keys (``small``, ``big``, etc)
-
-* ``<image_id>`` is the `SHA1 hash`_ of the image url
-
-.. _SHA1 hash: http://en.wikipedia.org/wiki/SHA_hash_functions
-
-Example of image files stored using ``small`` and ``big`` thumbnail names::
-
-   <IMAGES_STORE>/full/63bbfea82b8880ed33cdb762aa11fab722a90a24.jpg
-   <IMAGES_STORE>/thumbs/small/63bbfea82b8880ed33cdb762aa11fab722a90a24.jpg
-   <IMAGES_STORE>/thumbs/big/63bbfea82b8880ed33cdb762aa11fab722a90a24.jpg
-
-The first one is the full image, as downloaded from the site.
-
-Filtering out small images
---------------------------
-
-.. setting:: IMAGES_MIN_HEIGHT
-
-.. setting:: IMAGES_MIN_WIDTH
-
-You can drop images which are too small, by specifying the minimum allowed size
-in the :setting:`IMAGES_MIN_HEIGHT` and :setting:`IMAGES_MIN_WIDTH` settings.
-
-For example::
-
-   IMAGES_MIN_HEIGHT = 110
-   IMAGES_MIN_WIDTH = 110
-
-Note: these size constraints don't affect thumbnail generation at all.
-
-By default, there are no size constraints, so all images are processed.
-
-.. _topics-images-override:
-
-Implementing your custom Images Pipeline
-========================================
-
-.. module:: scrapy.contrib.pipeline.images
-   :synopsis: Images Pipeline
-
-Here are the methods that you should override in your custom Images Pipeline:
-
-.. class:: ImagesPipeline
-
-   .. method:: get_media_requests(item, info)
-
-      As seen on the workflow, the pipeline will get the URLs of the images to
-      download from the item. In order to do this, you must override the
-      :meth:`~get_media_requests` method and return a Request for each
-      image URL::
-
-         def get_media_requests(self, item, info):
-             for image_url in item['image_urls']:
-                 yield scrapy.Request(image_url)
-
-      Those requests will be processed by the pipeline and, when they have finished
-      downloading, the results will be sent to the
-      :meth:`~item_completed` method, as a list of 2-element tuples.
-      Each tuple will contain ``(success, image_info_or_failure)`` where:
-
-      * ``success`` is a boolean which is ``True`` if the image was downloaded
-        successfully or ``False`` if it failed for some reason
-
-      * ``image_info_or_error`` is a dict containing the following keys (if success
-        is ``True``) or a `Twisted Failure`_ if there was a problem.
-
-        * ``url`` - the url where the image was downloaded from. This is the url of
-          the request returned from the :meth:`~get_media_requests`
-          method.
-
-        * ``path`` - the path (relative to :setting:`IMAGES_STORE`) where the image
-          was stored
-
-        * ``checksum`` - a `MD5 hash`_ of the image contents
-
-      The list of tuples received by :meth:`~item_completed` is
-      guaranteed to retain the same order of the requests returned from the
-      :meth:`~get_media_requests` method.
-
-      Here's a typical value of the ``results`` argument::
-
-          [(True,
-            {'checksum': '2b00042f7481c7b056c4b410d28f33cf',
-             'path': 'full/7d97e98f8af710c7e7fe703abc8f639e0ee507c4.jpg',
-             'url': 'http://www.example.com/images/product1.jpg'}),
-           (True,
-            {'checksum': 'b9628c4ab9b595f72f280b90c4fd093d',
-             'path': 'full/1ca5879492b8fd606df1964ea3c1e2f4520f076f.jpg',
-             'url': 'http://www.example.com/images/product2.jpg'}),
-           (False,
-            Failure(...))]
-
-      By default the :meth:`get_media_requests` method returns ``None`` which
-      means there are no images to download for the item.
-
-   .. method:: item_completed(results, items, info)
-
-      The :meth:`ImagesPipeline.item_completed` method called when all image
-      requests for a single item have completed (either finished downloading, or
-      failed for some reason).
-
-      The :meth:`~item_completed` method must return the
-      output that will be sent to subsequent item pipeline stages, so you must
-      return (or drop) the item, as you would in any pipeline.
-
-      Here is an example of the :meth:`~item_completed` method where we
-      store the downloaded image paths (passed in results) in the ``image_paths``
-      item field, and we drop the item if it doesn't contain any images::
-
-          from scrapy.exceptions import DropItem
-
-          def item_completed(self, results, item, info):
-              image_paths = [x['path'] for ok, x in results if ok]
-              if not image_paths:
-                  raise DropItem("Item contains no images")
-              item['image_paths'] = image_paths
-              return item
-
-      By default, the :meth:`item_completed` method returns the item.
-
-
-Custom Images pipeline example
-==============================
-
-Here is a full example of the Images Pipeline whose methods are examplified
-above::
-
-    import scrapy
-    from scrapy.contrib.pipeline.images import ImagesPipeline
-    from scrapy.exceptions import DropItem
-
-    class MyImagesPipeline(ImagesPipeline):
-
-        def get_media_requests(self, item, info):
-            for image_url in item['image_urls']:
-                yield scrapy.Request(image_url)
-
-        def item_completed(self, results, item, info):
-            image_paths = [x['path'] for ok, x in results if ok]
-            if not image_paths:
-                raise DropItem("Item contains no images")
-            item['image_paths'] = image_paths
-            return item
-
-.. _Twisted Failure: http://twistedmatrix.com/documents/current/api/twisted.python.failure.Failure.html
-.. _MD5 hash: http://en.wikipedia.org/wiki/MD5
diff --git a/docs/topics/item-pipeline.rst b/docs/topics/item-pipeline.rst
index 146f6cbce..cd6a6d47e 100644
--- a/docs/topics/item-pipeline.rst
+++ b/docs/topics/item-pipeline.rst
@@ -5,14 +5,14 @@ Item Pipeline
 =============
 
 After an item has been scraped by a spider, it is sent to the Item Pipeline
-which process it through several components that are executed sequentially.
+which processes it through several components that are executed sequentially.
 
 Each item pipeline component (sometimes referred as just "Item Pipeline") is a
-Python class that implements a simple method. They receive an Item and perform
-an action over it, also deciding if the Item should continue through the
+Python class that implements a simple method. They receive an item and perform
+an action over it, also deciding if the item should continue through the
 pipeline or be dropped and no longer processed.
 
-Typical use for item pipelines are:
+Typical uses of item pipelines are:
 
 * cleansing HTML data
 * validating scraped data (checking that the items contain certain fields)
@@ -23,37 +23,53 @@ Typical use for item pipelines are:
 Writing your own item pipeline
 ==============================
 
-Writing your own item pipeline is easy. Each item pipeline component is a
-single Python class that must implement the following method:
+Each item pipeline component is a Python class that must implement the following method:
 
-.. method:: process_item(item, spider)
+.. method:: process_item(self, item, spider)
 
-   This method is called for every item pipeline component and must either return
-   a :class:`~scrapy.item.Item` (or any descendant class) object or raise a
-   :exc:`~scrapy.exceptions.DropItem` exception. Dropped items are no longer
-   processed by further pipeline components.
+   This method is called for every item pipeline component.
 
-   :param item: the item scraped
-   :type item: :class:`~scrapy.item.Item` object
+   `item` is an :ref:`item object <item-types>`, see
+   :ref:`supporting-item-types`.
+
+   :meth:`process_item` must either: return an :ref:`item object <item-types>`,
+   return a :class:`~twisted.internet.defer.Deferred` or raise a
+   :exc:`~scrapy.exceptions.DropItem` exception.
+
+   Dropped items are no longer processed by further pipeline components.
+
+   :param item: the scraped item
+   :type item: :ref:`item object <item-types>`
 
    :param spider: the spider which scraped the item
-   :type spider: :class:`~scrapy.spider.Spider` object
+   :type spider: :class:`~scrapy.spiders.Spider` object
 
 Additionally, they may also implement the following methods:
 
-.. method:: open_spider(spider)
+.. method:: open_spider(self, spider)
 
    This method is called when the spider is opened.
 
    :param spider: the spider which was opened
-   :type spider: :class:`~scrapy.spider.Spider` object
+   :type spider: :class:`~scrapy.spiders.Spider` object
 
-.. method:: close_spider(spider)
+.. method:: close_spider(self, spider)
 
    This method is called when the spider is closed.
 
    :param spider: the spider which was closed
-   :type spider: :class:`~scrapy.spider.Spider` object
+   :type spider: :class:`~scrapy.spiders.Spider` object
+
+.. method:: from_crawler(cls, crawler)
+
+   If present, this classmethod is called to create a pipeline instance
+   from a :class:`~scrapy.crawler.Crawler`. It must return a new instance
+   of the pipeline. Crawler object provides access to all Scrapy core
+   components like settings and signals; it is a way for pipeline to
+   access them and hook its functionality into Scrapy.
+
+   :param crawler: crawler that uses this pipeline
+   :type crawler: :class:`~scrapy.crawler.Crawler` object
 
 
 Item pipeline example
@@ -62,20 +78,22 @@ Item pipeline example
 Price validation and dropping items with no prices
 --------------------------------------------------
 
-Let's take a look at the following hypothetical pipeline that adjusts the ``price``
-attribute for those items that do not include VAT (``price_excludes_vat``
-attribute), and drops those items which don't contain a price::
+Let's take a look at the following hypothetical pipeline that adjusts the
+``price`` attribute for those items that do not include VAT
+(``price_excludes_vat`` attribute), and drops those items which don't
+contain a price::
 
+    from itemadapter import ItemAdapter
     from scrapy.exceptions import DropItem
-
-    class PricePipeline(object):
+    class PricePipeline:
 
         vat_factor = 1.15
 
         def process_item(self, item, spider):
-            if item['price']:
-                if item['price_excludes_vat']:
-                    item['price'] = item['price'] * self.vat_factor
+            adapter = ItemAdapter(item)
+            if adapter.get('price'):
+                if adapter.get('price_excludes_vat'):
+                    adapter['price'] = adapter['price'] * self.vat_factor
                 return item
             else:
                 raise DropItem("Missing price in %s" % item)
@@ -84,19 +102,24 @@ attribute), and drops those items which don't contain a price::
 Write items to a JSON file
 --------------------------
 
-The following pipeline stores all scraped items (from all spiders) into a a
+The following pipeline stores all scraped items (from all spiders) into a
 single ``items.jl`` file, containing one item per line serialized in JSON
 format::
 
    import json
 
-   class JsonWriterPipeline(object):
+   from itemadapter import ItemAdapter
 
-       def __init__(self):
-           self.file = open('items.jl', 'wb')
+   class JsonWriterPipeline:
+
+       def open_spider(self, spider):
+           self.file = open('items.jl', 'w')
+
+       def close_spider(self, spider):
+           self.file.close()
 
        def process_item(self, item, spider):
-           line = json.dumps(dict(item)) + "\n"
+           line = json.dumps(ItemAdapter(item).asdict()) + "\n"
            self.file.write(line)
            return item
 
@@ -104,26 +127,122 @@ format::
    item pipelines. If you really want to store all scraped items into a JSON
    file you should use the :ref:`Feed exports <topics-feed-exports>`.
 
+Write items to MongoDB
+----------------------
+
+In this example we'll write items to MongoDB_ using pymongo_.
+MongoDB address and database name are specified in Scrapy settings;
+MongoDB collection is named after item class.
+
+The main point of this example is to show how to use :meth:`from_crawler`
+method and how to clean up the resources properly.::
+
+    import pymongo
+    from itemadapter import ItemAdapter
+
+    class MongoPipeline:
+
+        collection_name = 'scrapy_items'
+
+        def __init__(self, mongo_uri, mongo_db):
+            self.mongo_uri = mongo_uri
+            self.mongo_db = mongo_db
+
+        @classmethod
+        def from_crawler(cls, crawler):
+            return cls(
+                mongo_uri=crawler.settings.get('MONGO_URI'),
+                mongo_db=crawler.settings.get('MONGO_DATABASE', 'items')
+            )
+
+        def open_spider(self, spider):
+            self.client = pymongo.MongoClient(self.mongo_uri)
+            self.db = self.client[self.mongo_db]
+
+        def close_spider(self, spider):
+            self.client.close()
+
+        def process_item(self, item, spider):
+            self.db[self.collection_name].insert_one(ItemAdapter(item).asdict())
+            return item
+
+.. _MongoDB: https://www.mongodb.com/
+.. _pymongo: https://api.mongodb.com/python/current/
+
+
+.. _ScreenshotPipeline:
+
+Take screenshot of item
+-----------------------
+
+This example demonstrates how to use :doc:`coroutine syntax <coroutines>` in
+the :meth:`process_item` method.
+
+This item pipeline makes a request to a locally-running instance of Splash_ to
+render a screenshot of the item URL. After the request response is downloaded,
+the item pipeline saves the screenshot to a file and adds the filename to the
+item.
+
+::
+
+    import hashlib
+    from urllib.parse import quote
+
+    import scrapy
+    from itemadapter import ItemAdapter
+
+    class ScreenshotPipeline:
+        """Pipeline that uses Splash to render screenshot of
+        every Scrapy item."""
+
+        SPLASH_URL = "http://localhost:8050/render.png?url={}"
+
+        async def process_item(self, item, spider):
+            adapter = ItemAdapter(item)
+            encoded_item_url = quote(adapter["url"])
+            screenshot_url = self.SPLASH_URL.format(encoded_item_url)
+            request = scrapy.Request(screenshot_url)
+            response = await spider.crawler.engine.download(request, spider)
+
+            if response.status != 200:
+                # Error happened, return item.
+                return item
+
+            # Save screenshot to file, filename will be hash of url.
+            url = adapter["url"]
+            url_hash = hashlib.md5(url.encode("utf8")).hexdigest()
+            filename = "{}.png".format(url_hash)
+            with open(filename, "wb") as f:
+                f.write(response.body)
+
+            # Store filename in item.
+            adapter["screenshot_filename"] = filename
+            return item
+
+.. _Splash: https://splash.readthedocs.io/en/stable/
+
 Duplicates filter
 -----------------
 
 A filter that looks for duplicate items, and drops those items that were
-already processed. Let say that our items have an unique id, but our spider
+already processed. Let's say that our items have a unique id, but our spider
 returns multiples items with the same id::
 
 
+    from itemadapter import ItemAdapter
     from scrapy.exceptions import DropItem
 
-    class DuplicatesPipeline(object):
+    class DuplicatesPipeline:
 
         def __init__(self):
             self.ids_seen = set()
 
         def process_item(self, item, spider):
-            if item['id'] in self.ids_seen:
-                raise DropItem("Duplicate item found: %s" % item)
+            adapter = ItemAdapter(item)
+            if adapter['id'] in self.ids_seen:
+                raise DropItem("Duplicate item found: %r" % item)
             else:
-                self.ids_seen.add(item['id'])
+                self.ids_seen.add(adapter['id'])
                 return item
 
 
@@ -139,6 +258,5 @@ To activate an Item Pipeline component you must add its class to the
    }
 
 The integer values you assign to classes in this setting determine the
-order they run in- items go through pipelines from order number low to
-high. It's customary to define these numbers in the 0-1000 range.
-
+order in which they run: items go through from lower valued to higher
+valued classes. It's customary to define these numbers in the 0-1000 range.
diff --git a/docs/topics/items.rst b/docs/topics/items.rst
index ee604a7f1..65bf156ac 100644
--- a/docs/topics/items.rst
+++ b/docs/topics/items.rst
@@ -8,22 +8,155 @@ Items
    :synopsis: Item and Field classes
 
 The main goal in scraping is to extract structured data from unstructured
-sources, typically, web pages. Scrapy provides the :class:`Item` class for this
-purpose.
+sources, typically, web pages. :ref:`Spiders <topics-spiders>` may return the
+extracted data as `items`, Python objects that define key-value pairs.
 
-:class:`Item` objects are simple containers used to collect the scraped data.
-They provide a `dictionary-like`_ API with a convenient syntax for declaring
-their available fields.
+Scrapy supports :ref:`multiple types of items <item-types>`. When you create an
+item, you may use whichever type of item you want. When you write code that
+receives an item, your code should :ref:`work for any item type
+<supporting-item-types>`.
 
-.. _dictionary-like: http://docs.python.org/library/stdtypes.html#dict
+.. _item-types:
+
+Item Types
+==========
+
+Scrapy supports the following types of items, via the `itemadapter`_ library:
+:ref:`dictionaries <dict-items>`, :ref:`Item objects <item-objects>`,
+:ref:`dataclass objects <dataclass-items>`, and :ref:`attrs objects <attrs-items>`.
+
+.. _itemadapter: https://github.com/scrapy/itemadapter
+
+.. _dict-items:
+
+Dictionaries
+------------
+
+As an item type, :class:`dict` is convenient and familiar.
+
+.. _item-objects:
+
+Item objects
+------------
+
+:class:`Item` provides a :class:`dict`-like API plus additional features that
+make it the most feature-complete item type:
+
+.. class:: Item([arg])
+
+    :class:`Item` objects replicate the standard :class:`dict` API, including
+    its ``__init__`` method.
+
+    :class:`Item` allows defining field names, so that:
+
+    -   :class:`KeyError` is raised when using undefined field names (i.e.
+        prevents typos going unnoticed)
+
+    -   :ref:`Item exporters <topics-exporters>` can export all fields by
+        default even if the first scraped object does not have values for all
+        of them
+
+    :class:`Item` also allows defining field metadata, which can be used to
+    :ref:`customize serialization <topics-exporters-field-serialization>`.
+
+    :mod:`trackref` tracks :class:`Item` objects to help find memory leaks
+    (see :ref:`topics-leaks-trackrefs`).
+
+    :class:`Item` objects also provide the following additional API members:
+
+    .. automethod:: copy
+
+    .. automethod:: deepcopy
+
+    .. attribute:: fields
+
+        A dictionary containing *all declared fields* for this Item, not only
+        those populated. The keys are the field names and the values are the
+        :class:`Field` objects used in the :ref:`Item declaration
+        <topics-items-declaring>`.
+
+Example::
+
+    from scrapy.item import Item, Field
+
+    class CustomItem(Item):
+        one_field = Field()
+        another_field = Field()
+
+.. _dataclass-items:
+
+Dataclass objects
+-----------------
+
+.. versionadded:: 2.2
+
+:func:`~dataclasses.dataclass` allows defining item classes with field names,
+so that :ref:`item exporters <topics-exporters>` can export all fields by
+default even if the first scraped object does not have values for all of them.
+
+Additionally, ``dataclass`` items also allow to:
+
+* define the type and default value of each defined field.
+
+* define custom field metadata through :func:`dataclasses.field`, which can be used to
+  :ref:`customize serialization <topics-exporters-field-serialization>`.
+
+They work natively in Python 3.7 or later, or using the `dataclasses
+backport`_ in Python 3.6.
+
+.. _dataclasses backport: https://pypi.org/project/dataclasses/
+
+Example::
+
+    from dataclasses import dataclass
+
+    @dataclass
+    class CustomItem:
+        one_field: str
+        another_field: int
+
+.. note:: Field types are not enforced at run time.
+
+.. _attrs-items:
+
+attr.s objects
+--------------
+
+.. versionadded:: 2.2
+
+:func:`attr.s` allows defining item classes with field names,
+so that :ref:`item exporters <topics-exporters>` can export all fields by
+default even if the first scraped object does not have values for all of them.
+
+Additionally, ``attr.s`` items also allow to:
+
+* define the type and default value of each defined field.
+
+* define custom field :ref:`metadata <attrs:metadata>`, which can be used to
+  :ref:`customize serialization <topics-exporters-field-serialization>`.
+
+In order to use this type, the :doc:`attrs package <attrs:index>` needs to be installed.
+
+Example::
+
+    import attr
+
+    @attr.s
+    class CustomItem:
+        one_field = attr.ib()
+        another_field = attr.ib()
+
+
+Working with Item objects
+=========================
 
 .. _topics-items-declaring:
 
-Declaring Items
-===============
+Declaring Item subclasses
+-------------------------
 
-Items are declared using a simple class definition syntax and :class:`Field`
-objects. Here is an example::
+Item subclasses are declared using a simple class definition syntax and
+:class:`Field` objects. Here is an example::
 
     import scrapy
 
@@ -31,19 +164,21 @@ objects. Here is an example::
         name = scrapy.Field()
         price = scrapy.Field()
         stock = scrapy.Field()
+        tags = scrapy.Field()
         last_updated = scrapy.Field(serializer=str)
 
 .. note:: Those familiar with `Django`_ will notice that Scrapy Items are
    declared similar to `Django Models`_, except that Scrapy Items are much
    simpler as there is no concept of different field types.
 
-.. _Django: http://www.djangoproject.com/
-.. _Django Models: http://docs.djangoproject.com/en/dev/topics/db/models/
+.. _Django: https://www.djangoproject.com/
+.. _Django Models: https://docs.djangoproject.com/en/dev/topics/db/models/
+
 
 .. _topics-items-fields:
 
-Item Fields
-===========
+Declaring fields
+----------------
 
 :class:`Field` objects are used to specify metadata for each field. For
 example, the serializer function for the ``last_updated`` field illustrated in
@@ -52,7 +187,7 @@ the example above.
 You can specify any kind of metadata for each field. There is no restriction on
 the values accepted by :class:`Field` objects. For this same
 reason, there is no reference list of all available metadata keys. Each key
-defined in :class:`Field` objects could be used by a different components, and
+defined in :class:`Field` objects could be used by a different component, and
 only those components know about it. You can also define and use any other
 :class:`Field` key in your project too, for your own needs. The main goal of
 :class:`Field` objects is to provide a way to define all field metadata in one
@@ -64,120 +199,153 @@ It's important to note that the :class:`Field` objects used to declare the item
 do not stay assigned as class attributes. Instead, they can be accessed through
 the :attr:`Item.fields` attribute.
 
-And that's all you need to know about declaring items.
+.. class:: Field([arg])
 
-Working with Items
-==================
+    The :class:`Field` class is just an alias to the built-in :class:`dict` class and
+    doesn't provide any extra functionality or attributes. In other words,
+    :class:`Field` objects are plain-old Python dicts. A separate class is used
+    to support the :ref:`item declaration syntax <topics-items-declaring>`
+    based on class attributes.
+
+.. note:: Field metadata can also be declared for ``dataclass`` and ``attrs``
+    items. Please refer to the documentation for `dataclasses.field`_ and
+    `attr.ib`_ for additional information.
+
+    .. _dataclasses.field: https://docs.python.org/3/library/dataclasses.html#dataclasses.field
+    .. _attr.ib: https://www.attrs.org/en/stable/api.html#attr.ib
+
+
+Working with Item objects
+-------------------------
 
 Here are some examples of common tasks performed with items, using the
 ``Product`` item :ref:`declared above  <topics-items-declaring>`. You will
-notice the API is very similar to the `dict API`_.
+notice the API is very similar to the :class:`dict` API.
 
 Creating items
---------------
+''''''''''''''
 
-::
+>>> product = Product(name='Desktop PC', price=1000)
+>>> print(product)
+Product(name='Desktop PC', price=1000)
 
-    >>> product = Product(name='Desktop PC', price=1000)
-    >>> print product
-    Product(name='Desktop PC', price=1000)
 
 Getting field values
---------------------
+''''''''''''''''''''
 
-::
+>>> product['name']
+Desktop PC
+>>> product.get('name')
+Desktop PC
 
-    >>> product['name']
-    Desktop PC
-    >>> product.get('name')
-    Desktop PC
+>>> product['price']
+1000
 
-    >>> product['price']
-    1000
+>>> product['last_updated']
+Traceback (most recent call last):
+    ...
+KeyError: 'last_updated'
 
-    >>> product['last_updated']
-    Traceback (most recent call last):
-        ...
-    KeyError: 'last_updated'
+>>> product.get('last_updated', 'not set')
+not set
 
-    >>> product.get('last_updated', 'not set')
-    not set
+>>> product['lala'] # getting unknown field
+Traceback (most recent call last):
+    ...
+KeyError: 'lala'
 
-    >>> product['lala'] # getting unknown field
-    Traceback (most recent call last):
-        ...
-    KeyError: 'lala'
+>>> product.get('lala', 'unknown field')
+'unknown field'
 
-    >>> product.get('lala', 'unknown field')
-    'unknown field'
+>>> 'name' in product  # is name field populated?
+True
 
-    >>> 'name' in product  # is name field populated?
-    True
+>>> 'last_updated' in product  # is last_updated populated?
+False
 
-    >>> 'last_updated' in product  # is last_updated populated?
-    False
+>>> 'last_updated' in product.fields  # is last_updated a declared field?
+True
 
-    >>> 'last_updated' in product.fields  # is last_updated a declared field?
-    True
+>>> 'lala' in product.fields  # is lala a declared field?
+False
 
-    >>> 'lala' in product.fields  # is lala a declared field?
-    False
 
 Setting field values
---------------------
+''''''''''''''''''''
 
-::
+>>> product['last_updated'] = 'today'
+>>> product['last_updated']
+today
 
-    >>> product['last_updated'] = 'today'
-    >>> product['last_updated']
-    today
+>>> product['lala'] = 'test' # setting unknown field
+Traceback (most recent call last):
+    ...
+KeyError: 'Product does not support field: lala'
 
-    >>> product['lala'] = 'test' # setting unknown field
-    Traceback (most recent call last):
-        ...
-    KeyError: 'Product does not support field: lala'
 
 Accessing all populated values
-------------------------------
+''''''''''''''''''''''''''''''
 
-To access all populated values, just use the typical `dict API`_::
+To access all populated values, just use the typical :class:`dict` API:
 
-    >>> product.keys()
-    ['price', 'name']
+>>> product.keys()
+['price', 'name']
+
+>>> product.items()
+[('price', 1000), ('name', 'Desktop PC')]
+
+
+.. _copying-items:
+
+Copying items
+'''''''''''''
+
+To copy an item, you must first decide whether you want a shallow copy or a
+deep copy.
+
+If your item contains :term:`mutable` values like lists or dictionaries,
+a shallow copy will keep references to the same mutable values across all
+different copies.
+
+For example, if you have an item with a list of tags, and you create a shallow
+copy of that item, both the original item and the copy have the same list of
+tags. Adding a tag to the list of one of the items will add the tag to the
+other item as well.
+
+If that is not the desired behavior, use a deep copy instead.
+
+See :mod:`copy` for more information.
+
+To create a shallow copy of an item, you can either call
+:meth:`~scrapy.item.Item.copy` on an existing item
+(``product2 = product.copy()``) or instantiate your item class from an existing
+item (``product2 = Product(product)``).
+
+To create a deep copy, call :meth:`~scrapy.item.Item.deepcopy` instead
+(``product2 = product.deepcopy()``).
 
-    >>> product.items()
-    [('price', 1000), ('name', 'Desktop PC')]
 
 Other common tasks
-------------------
+''''''''''''''''''
 
-Copying items::
+Creating dicts from items:
 
-    >>> product2 = Product(product)
-    >>> print product2
-    Product(name='Desktop PC', price=1000)
+>>> dict(product) # create a dict from all populated values
+{'price': 1000, 'name': 'Desktop PC'}
 
-    >>> product3 = product2.copy()
-    >>> print product3
-    Product(name='Desktop PC', price=1000)
+Creating items from dicts:
 
-Creating dicts from items::
+>>> Product({'name': 'Laptop PC', 'price': 1500})
+Product(price=1500, name='Laptop PC')
 
-    >>> dict(product) # create a dict from all populated values
-    {'price': 1000, 'name': 'Desktop PC'}
+>>> Product({'name': 'Laptop PC', 'lala': 1500}) # warning: unknown field in dict
+Traceback (most recent call last):
+    ...
+KeyError: 'Product does not support field: lala'
 
-Creating items from dicts::
 
-    >>> Product({'name': 'Laptop PC', 'price': 1500})
-    Product(price=1500, name='Laptop PC')
-
-    >>> Product({'name': 'Laptop PC', 'lala': 1500}) # warning: unknown field in dict
-    Traceback (most recent call last):
-        ...
-    KeyError: 'Product does not support field: lala'
-
-Extending Items
-===============
+Extending Item subclasses
+-------------------------
 
 You can extend Items (to add more fields or to change some metadata for some
 fields) by declaring a subclass of your original Item.
@@ -197,36 +365,25 @@ appending more values, or changing existing values, like this::
 That adds (or replaces) the ``serializer`` metadata key for the ``name`` field,
 keeping all the previously existing metadata values.
 
-Item objects
-============
 
-.. class:: Item([arg])
+.. _supporting-item-types:
 
-    Return a new Item optionally initialized from the given argument.
+Supporting All Item Types
+=========================
 
-    Items replicate the standard `dict API`_, including its constructor. The
-    only additional attribute provided by Items is:
+In code that receives an item, such as methods of :ref:`item pipelines
+<topics-item-pipeline>` or :ref:`spider middlewares
+<topics-spider-middleware>`, it is a good practice to use the
+:class:`~itemadapter.ItemAdapter` class and the
+:func:`~itemadapter.is_item` function to write code that works for
+any :ref:`supported item type <item-types>`:
 
-    .. attribute:: fields
+.. autoclass:: itemadapter.ItemAdapter
 
-        A dictionary containing *all declared fields* for this Item, not only
-        those populated. The keys are the field names and the values are the
-        :class:`Field` objects used in the :ref:`Item declaration
-        <topics-items-declaring>`.
-
-.. _dict API: http://docs.python.org/library/stdtypes.html#dict
-
-Field objects
-=============
-
-.. class:: Field([arg])
-
-    The :class:`Field` class is just an alias to the built-in `dict`_ class and
-    doesn't provide any extra functionality or attributes. In other words,
-    :class:`Field` objects are plain-old Python dicts. A separate class is used
-    to support the :ref:`item declaration syntax <topics-items-declaring>`
-    based on class attributes.
-
-.. _dict: http://docs.python.org/library/stdtypes.html#dict
+.. autofunction:: itemadapter.is_item
 
 
+Other classes related to items
+==============================
+
+.. autoclass:: ItemMeta
diff --git a/docs/topics/jobs.rst b/docs/topics/jobs.rst
index 4e09000d2..58601824a 100644
--- a/docs/topics/jobs.rst
+++ b/docs/topics/jobs.rst
@@ -22,7 +22,7 @@ Job directory
 
 To enable persistence support you just need to define a *job directory* through
 the ``JOBDIR`` setting. This directory will be for storing all required data to
-keep the state of a single job (ie. a spider run).  It's important to note that
+keep the state of a single job (i.e. a spider run).  It's important to note that
 this directory must not be shared by different spiders, or even different
 jobs/runs of the same spider, as it's meant to be used for storing the state of
 a *single* job.
@@ -30,7 +30,7 @@ a *single* job.
 How to use it
 =============
 
-To start a spider with persistence supported enabled, run it like this::
+To start a spider with persistence support enabled, run it like this::
 
     scrapy crawl somespider -s JOBDIR=crawls/somespider-1
 
@@ -68,32 +68,17 @@ Cookies may expire. So, if you don't resume your spider quickly the requests
 scheduled may no longer work. This won't be an issue if you spider doesn't rely
 on cookies.
 
+
+.. _request-serialization:
+
 Request serialization
 ---------------------
 
-Requests must be serializable by the `pickle` module, in order for persistence
-to work, so you should make sure that your requests are serializable.
+For persistence to work, :class:`~scrapy.http.Request` objects must be
+serializable with :mod:`pickle`, except for the ``callback`` and ``errback``
+values passed to their ``__init__`` method, which must be methods of the
+running :class:`~scrapy.spiders.Spider` class.
 
-The most common issue here is to use ``lambda`` functions on request callbacks that
-can't be persisted.
-
-So, for example, this won't work::
-
-    def some_callback(self, response):
-        somearg = 'test'
-        return scrapy.Request('http://www.example.com', callback=lambda r: self.other_callback(r, somearg))
-
-    def other_callback(self, response, somearg):
-        print "the argument passed is:", somearg
-
-But this will::
-
-    def some_callback(self, response):
-        somearg = 'test'
-        return scrapy.Request('http://www.example.com', meta={'somearg': somearg})
-
-    def other_callback(self, response):
-        somearg = response.meta['somearg']
-        print "the argument passed is:", somearg
-
-.. _pickle: http://docs.python.org/library/pickle.html
+If you wish to log the requests that couldn't be serialized, you can set the
+:setting:`SCHEDULER_DEBUG` setting to ``True`` in the project's settings page.
+It is ``False`` by default.
diff --git a/docs/topics/leaks.rst b/docs/topics/leaks.rst
index 27c50a225..d2f7edf0a 100644
--- a/docs/topics/leaks.rst
+++ b/docs/topics/leaks.rst
@@ -4,7 +4,7 @@
 Debugging memory leaks
 ======================
 
-In Scrapy, objects such as Requests, Responses and Items have a finite
+In Scrapy, objects such as requests, responses and items have a finite
 lifetime: they are created, used for a while, and finally destroyed.
 
 From all those objects, the Request is probably the one with the longest
@@ -17,8 +17,8 @@ what is known as a "memory leak".
 
 To help debugging memory leaks, Scrapy provides a built-in mechanism for
 tracking objects references called :ref:`trackref <topics-leaks-trackrefs>`,
-and you can also use a third-party library called :ref:`Guppy
-<topics-leaks-guppy>` for more advanced memory debugging (see below for more
+and you can also use a third-party library called :ref:`muppy
+<topics-leaks-muppy>` for more advanced memory debugging (see below for more
 info). Both mechanisms must be used from the :ref:`Telnet Console
 <topics-telnetconsole>`.
 
@@ -27,34 +27,42 @@ Common causes of memory leaks
 
 It happens quite often (sometimes by accident, sometimes on purpose) that the
 Scrapy developer passes objects referenced in Requests (for example, using the
-:attr:`~scrapy.http.Request.meta` attribute or the request callback function)
-and that effectively bounds the lifetime of those referenced objects to the
-lifetime of the Request. This is, by far, the most common cause of memory leaks
-in Scrapy projects, and a quite difficult one to debug for newcomers.
+:attr:`~scrapy.http.Request.cb_kwargs` or :attr:`~scrapy.http.Request.meta`
+attributes or the request callback function) and that effectively bounds the
+lifetime of those referenced objects to the lifetime of the Request. This is,
+by far, the most common cause of memory leaks in Scrapy projects, and a quite
+difficult one to debug for newcomers.
 
 In big projects, the spiders are typically written by different people and some
 of those spiders could be "leaking" and thus affecting the rest of the other
 (well-written) spiders when they get to run concurrently, which, in turn,
-affects the whole crawling process. 
-
-At the same time, it's hard to avoid the reasons that cause these leaks
-without restricting the power of the framework, so we have decided not to
-restrict the functionally but provide useful tools for debugging these leaks,
-which quite often consist in an answer to the question: *which spider is leaking?*.
+affects the whole crawling process.
 
 The leak could also come from a custom middleware, pipeline or extension that
 you have written, if you are not releasing the (previously allocated) resources
-properly. For example, if you're allocating resources on
-:signal:`spider_opened` but not releasing them on :signal:`spider_closed`.
+properly. For example, allocating resources on :signal:`spider_opened`
+but not releasing them on :signal:`spider_closed` may cause problems if
+you're running :ref:`multiple spiders per process <run-multiple-spiders>`.
+
+Too Many Requests?
+------------------
+
+By default Scrapy keeps the request queue in memory; it includes
+:class:`~scrapy.http.Request` objects and all objects
+referenced in Request attributes (e.g. in :attr:`~scrapy.http.Request.cb_kwargs`
+and :attr:`~scrapy.http.Request.meta`).
+While not necessarily a leak, this can take a lot of memory. Enabling
+:ref:`persistent job queue <topics-jobs>` could help keeping memory usage
+in control.
 
 .. _topics-leaks-trackrefs:
 
 Debugging memory leaks with ``trackref``
 ========================================
 
-``trackref`` is a module provided by Scrapy to debug the most common cases of
-memory leaks. It basically tracks the references to all live Requests,
-Responses, Item and Selector objects. 
+:mod:`trackref` is a module provided by Scrapy to debug the most common cases of
+memory leaks. It basically tracks the references to all live Request,
+Response, Item, Spider and Selector objects.
 
 You can enter the telnet console and inspect how many objects (of the classes
 mentioned above) are currently alive using the ``prefs()`` function which is an
@@ -71,12 +79,10 @@ alias to the :func:`~scrapy.utils.trackref.print_live_refs` function::
     FormRequest                       878   oldest: 7s ago
 
 As you can see, that report also shows the "age" of the oldest object in each
-class. 
-
-If you do have leaks, chances are you can figure out which spider is leaking by
-looking at the oldest request or response. You can get the oldest object of
-each class using the :func:`~scrapy.utils.trackref.get_oldest` function like
-this (from the telnet console).
+class. If you're running multiple spiders per process chances are you can
+figure out which spider is leaking by looking at the oldest request or response.
+You can get the oldest object of each class using the
+:func:`~scrapy.utils.trackref.get_oldest` function (from the telnet console).
 
 Which objects are tracked?
 --------------------------
@@ -84,28 +90,27 @@ Which objects are tracked?
 The objects tracked by ``trackrefs`` are all from these classes (and all its
 subclasses):
 
-* ``scrapy.http.Request``
-* ``scrapy.http.Response``
-* ``scrapy.item.Item``
-* ``scrapy.selector.Selector``
-* ``scrapy.spider.Spider``
+* :class:`scrapy.http.Request`
+* :class:`scrapy.http.Response`
+* :class:`scrapy.item.Item`
+* :class:`scrapy.selector.Selector`
+* :class:`scrapy.spiders.Spider`
 
 A real example
 --------------
 
-Let's see a concrete example of an hypothetical case of memory leaks.
-
+Let's see a concrete example of a hypothetical case of memory leaks.
 Suppose we have some spider with a line similar to this one::
 
     return Request("http://www.somenastyspider.com/product.php?pid=%d" % product_id,
-        callback=self.parse, meta={referer: response}")
+                   callback=self.parse, cb_kwargs={'referer': response})
 
 That line is passing a response reference inside a request which effectively
 ties the response lifetime to the requests' one, and that would definitely
 cause memory leaks.
 
-Let's see how we can discover which one is the nasty spider (without knowing it
-a-priori, of course) by using the ``trackref`` tool.
+Let's see how we can discover the cause (without knowing it
+a priori, of course) by using the ``trackref`` tool.
 
 After the crawler is running for a few minutes and we notice its memory usage
 has grown a lot, we can enter its telnet console and check the live
@@ -121,39 +126,39 @@ references::
 
 The fact that there are so many live responses (and that they're so old) is
 definitely suspicious, as responses should have a relatively short lifetime
-compared to Requests. So let's check the oldest response::
+compared to Requests. The number of responses is similar to the number
+of requests, so it looks like they are tied in a some way. We can now go
+and check the code of the spider to discover the nasty line that is
+generating the leaks (passing response references inside requests).
 
-    >>> from scrapy.utils.trackref import get_oldest
-    >>> r = get_oldest('HtmlResponse')
-    >>> r.url
-    'http://www.somenastyspider.com/product.php?pid=123'
+Sometimes extra information about live objects can be helpful.
+Let's check the oldest response:
 
-There it is. By looking at the URL of the oldest response we can see it belongs
-to the ``somenastyspider.com`` spider. We can now go and check the code of that
-spider to discover the nasty line that is generating the leaks (passing
-response references inside requests).
+>>> from scrapy.utils.trackref import get_oldest
+>>> r = get_oldest('HtmlResponse')
+>>> r.url
+'http://www.somenastyspider.com/product.php?pid=123'
 
 If you want to iterate over all objects, instead of getting the oldest one, you
-can use the :func:`iter_all` function::
+can use the :func:`scrapy.utils.trackref.iter_all` function:
 
-    >>> from scrapy.utils.trackref import iter_all
-    >>> [r.url for r in iter_all('HtmlResponse')]
-    ['http://www.somenastyspider.com/product.php?pid=123',
-     'http://www.somenastyspider.com/product.php?pid=584',
-    ...
+>>> from scrapy.utils.trackref import iter_all
+>>> [r.url for r in iter_all('HtmlResponse')]
+['http://www.somenastyspider.com/product.php?pid=123',
+ 'http://www.somenastyspider.com/product.php?pid=584',
+...]
 
 Too many spiders?
 -----------------
 
-If your project has too many spiders, the output of ``prefs()`` can be
-difficult to read. For this reason, that function has a ``ignore`` argument
-which can be used to ignore a particular class (and all its subclases). For
-example, using::
+If your project has too many spiders executed in parallel,
+the output of :func:`prefs()` can be difficult to read.
+For this reason, that function has a ``ignore`` argument which can be used to
+ignore a particular class (and all its subclases). For
+example, this won't show any live references to spiders:
 
-    >>> from scrapy.spider import Spider
-    >>> prefs(ignore=Spider)
-
-Won't show any live references to spiders.
+>>> from scrapy.spiders import Spider
+>>> prefs(ignore=Spider)
 
 .. module:: scrapy.utils.trackref
    :synopsis: Track references of live objects
@@ -165,7 +170,7 @@ Here are the functions available in the :mod:`~scrapy.utils.trackref` module.
 
 .. class:: object_ref
 
-    Inherit from this class (instead of object) if you want to track live
+    Inherit from this class if you want to track live
     instances with the ``trackref`` module.
 
 .. function:: print_live_refs(class_name, ignore=NoneType)
@@ -174,7 +179,7 @@ Here are the functions available in the :mod:`~scrapy.utils.trackref` module.
 
     :param ignore: if given, all objects from the specified class (or tuple of
         classes) will be ignored.
-    :type ignore: class or classes tuple
+    :type ignore: type or tuple
 
 .. function:: get_oldest(class_name)
 
@@ -188,69 +193,57 @@ Here are the functions available in the :mod:`~scrapy.utils.trackref` module.
     ``None`` if none is found. Use :func:`print_live_refs` first to get a list
     of all tracked live objects per class name.
 
-.. _topics-leaks-guppy:
+.. _topics-leaks-muppy:
 
-Debugging memory leaks with Guppy
+Debugging memory leaks with muppy
 =================================
 
 ``trackref`` provides a very convenient mechanism for tracking down memory
 leaks, but it only keeps track of the objects that are more likely to cause
-memory leaks (Requests, Responses, Items, and Selectors). However, there are
-other cases where the memory leaks could come from other (more or less obscure)
-objects. If this is your case, and you can't find your leaks using ``trackref``,
-you still have another resource: the `Guppy library`_. 
+memory leaks. However, there are other cases where the memory leaks could come
+from other (more or less obscure) objects. If this is your case, and you can't
+find your leaks using ``trackref``, you still have another resource: the muppy
+library.
 
-.. _Guppy library: http://pypi.python.org/pypi/guppy
+You can use muppy from `Pympler`_.
 
-If you use ``setuptools``, you can install Guppy with the following command::
+.. _Pympler: https://pypi.org/project/Pympler/
 
-    easy_install guppy
+If you use ``pip``, you can install muppy with the following command::
 
-.. _setuptools: http://pypi.python.org/pypi/setuptools
+    pip install Pympler
 
-The telnet console also comes with a built-in shortcut (``hpy``) for accessing
-Guppy heap objects. Here's an example to view all Python objects available in
-the heap using Guppy::
+Here's an example to view all Python objects available in
+the heap using muppy:
 
-    >>> x = hpy.heap()
-    >>> x.bytype
-    Partition of a set of 297033 objects. Total size = 52587824 bytes.
-     Index  Count   %     Size   % Cumulative  % Type
-         0  22307   8 16423880  31  16423880  31 dict
-         1 122285  41 12441544  24  28865424  55 str
-         2  68346  23  5966696  11  34832120  66 tuple
-         3    227   0  5836528  11  40668648  77 unicode
-         4   2461   1  2222272   4  42890920  82 type
-         5  16870   6  2024400   4  44915320  85 function
-         6  13949   5  1673880   3  46589200  89 types.CodeType
-         7  13422   5  1653104   3  48242304  92 list
-         8   3735   1  1173680   2  49415984  94 _sre.SRE_Pattern
-         9   1209   0   456936   1  49872920  95 scrapy.http.headers.Headers
-    <1676 more rows. Type e.g. '_.more' to view.>
+>>> from pympler import muppy
+>>> all_objects = muppy.get_objects()
+>>> len(all_objects)
+28667
+>>> from pympler import summary
+>>> suml = summary.summarize(all_objects)
+>>> summary.print_(suml)
+                               types |   # objects |   total size
+==================================== | =========== | ============
+                         <class 'str |        9822 |      1.10 MB
+                        <class 'dict |        1658 |    856.62 KB
+                        <class 'type |         436 |    443.60 KB
+                        <class 'code |        2974 |    419.56 KB
+          <class '_io.BufferedWriter |           2 |    256.34 KB
+                         <class 'set |         420 |    159.88 KB
+          <class '_io.BufferedReader |           1 |    128.17 KB
+          <class 'wrapper_descriptor |        1130 |     88.28 KB
+                       <class 'tuple |        1304 |     86.57 KB
+                     <class 'weakref |        1013 |     79.14 KB
+  <class 'builtin_function_or_method |         958 |     67.36 KB
+           <class 'method_descriptor |         865 |     60.82 KB
+                 <class 'abc.ABCMeta |          62 |     59.96 KB
+                        <class 'list |         446 |     58.52 KB
+                         <class 'int |        1425 |     43.20 KB
 
-You can see that most space is used by dicts. Then, if you want to see from
-which attribute those dicts are referenced, you could do::
+For more info about muppy, refer to the `muppy documentation`_.
 
-    >>> x.bytype[0].byvia
-    Partition of a set of 22307 objects. Total size = 16423880 bytes.
-     Index  Count   %     Size   % Cumulative  % Referred Via:
-         0  10982  49  9416336  57   9416336  57 '.__dict__'
-         1   1820   8  2681504  16  12097840  74 '.__dict__', '.func_globals'
-         2   3097  14  1122904   7  13220744  80
-         3    990   4   277200   2  13497944  82 "['cookies']"
-         4    987   4   276360   2  13774304  84 "['cache']"
-         5    985   4   275800   2  14050104  86 "['meta']"
-         6    897   4   251160   2  14301264  87 '[2]'
-         7      1   0   196888   1  14498152  88 "['moduleDict']", "['modules']"
-         8    672   3   188160   1  14686312  89 "['cb_kwargs']"
-         9     27   0   155016   1  14841328  90 '[1]'
-    <333 more rows. Type e.g. '_.more' to view.>
-
-As you can see, the Guppy module is very powerful but also requires some deep
-knowledge about Python internals. For more info about Guppy, refer to the
-`Guppy documentation`_.
-
-.. _Guppy documentation: http://guppy-pe.sourceforge.net/
+.. _muppy documentation: https://pythonhosted.org/Pympler/muppy.html
 
 .. _topics-leaks-without-leaks:
 
@@ -263,9 +256,9 @@ though neither Scrapy nor your project are leaking memory. This is due to a
 (not so well) known problem of Python, which may not return released memory to
 the operating system in some cases. For more information on this issue see:
 
-* `Python Memory Management <http://evanjones.ca/python-memory.html>`_
-* `Python Memory Management Part 2 <http://evanjones.ca/python-memory-part2.html>`_
-* `Python Memory Management Part 3 <http://evanjones.ca/python-memory-part3.html>`_
+* `Python Memory Management <https://www.evanjones.ca/python-memory.html>`_
+* `Python Memory Management Part 2 <https://www.evanjones.ca/python-memory-part2.html>`_
+* `Python Memory Management Part 3 <https://www.evanjones.ca/python-memory-part3.html>`_
 
 The improvements proposed by Evan Jones, which are detailed in `this paper`_,
 got merged in Python 2.5, but this only reduces the problem, it doesn't fix it
@@ -279,7 +272,8 @@ completely. To quote the paper:
     to move to a compacting garbage collector, which is able to move objects in
     memory. This would require significant changes to the Python interpreter.*
 
-This problem will be fixed in future Scrapy releases, where we plan to adopt a
-new process model and run spiders in a pool of recyclable sub-processes.
+.. _this paper: https://www.evanjones.ca/memoryallocator/
 
-.. _this paper: http://evanjones.ca/memoryallocator/
+To keep memory consumption reasonable you can split the job into several
+smaller jobs or enable :ref:`persistent job queue <topics-jobs>`
+and stop/start spider from time to time.
diff --git a/docs/topics/link-extractors.rst b/docs/topics/link-extractors.rst
index 43740adcc..ed32411b0 100644
--- a/docs/topics/link-extractors.rst
+++ b/docs/topics/link-extractors.rst
@@ -4,54 +4,41 @@
 Link Extractors
 ===============
 
-Link extractors are objects whose only purpose is to extract links from web
-pages (:class:`scrapy.http.Response` objects) which will be eventually
-followed.
+A link extractor is an object that extracts links from responses.
 
-There is ``scrapy.contrib.linkextractors import LinkExtractor`` available
-in Scrapy, but you can create your own custom Link Extractors to suit your
-needs by implementing a simple interface.
-
-The only public method that every link extractor has is ``extract_links``,
-which receives a :class:`~scrapy.http.Response` object and returns a list
-of :class:`scrapy.link.Link` objects. Link extractors are meant to be
-instantiated once and their ``extract_links`` method called several times
-with different responses to extract links to follow.
-
-Link extractors are used in the :class:`~scrapy.contrib.spiders.CrawlSpider`
-class (available in Scrapy), through a set of rules, but you can also use it in
-your spiders, even if you don't subclass from
-:class:`~scrapy.contrib.spiders.CrawlSpider`, as its purpose is very simple: to
-extract links.
+The ``__init__`` method of
+:class:`~scrapy.linkextractors.lxmlhtml.LxmlLinkExtractor` takes settings that
+determine which links may be extracted. :class:`LxmlLinkExtractor.extract_links
+<scrapy.linkextractors.lxmlhtml.LxmlLinkExtractor.extract_links>` returns a
+list of matching :class:`scrapy.link.Link` objects from a
+:class:`~scrapy.http.Response` object.
 
+Link extractors are used in :class:`~scrapy.spiders.CrawlSpider` spiders
+through a set of :class:`~scrapy.spiders.Rule` objects. You can also use link
+extractors in regular spiders.
 
 .. _topics-link-extractors-ref:
 
-Built-in link extractors reference
-==================================
+Link extractor reference
+========================
 
-.. module:: scrapy.contrib.linkextractors
+.. module:: scrapy.linkextractors
    :synopsis: Link extractors classes
 
-Link extractors classes bundled with Scrapy are provided in the
-:mod:`scrapy.contrib.linkextractors` module.
+The link extractor class is
+:class:`scrapy.linkextractors.lxmlhtml.LxmlLinkExtractor`. For convenience it
+can also be imported as ``scrapy.linkextractors.LinkExtractor``::
 
-The default link extractor is ``LinkExtractor``, which is the same as
-:class:`~.LxmlLinkExtractor`::
-
-    from scrapy.contrib.linkextractors import LinkExtractor
-
-There used to be other link extractor classes in previous Scrapy versions,
-but they are deprecated now.
+    from scrapy.linkextractors import LinkExtractor
 
 LxmlLinkExtractor
 -----------------
 
-.. module:: scrapy.contrib.linkextractors.lxmlhtml
+.. module:: scrapy.linkextractors.lxmlhtml
    :synopsis: lxml's HTMLParser-based link extractors
 
 
-.. class:: LxmlLinkExtractor(allow=(), deny=(), allow_domains=(), deny_domains=(), deny_extensions=None, restrict_xpaths=(), tags=('a', 'area'), attrs=('href',), canonicalize=True, unique=True, process_value=None)
+.. class:: LxmlLinkExtractor(allow=(), deny=(), allow_domains=(), deny_domains=(), deny_extensions=None, restrict_xpaths=(), restrict_css=(), tags=('a', 'area'), attrs=('href',), canonicalize=False, unique=True, process_value=None, strip=True)
 
     LxmlLinkExtractor is the recommended link extractor with handy filtering
     options. It is implemented using lxml's robust HTMLParser.
@@ -59,13 +46,13 @@ LxmlLinkExtractor
     :param allow: a single regular expression (or list of regular expressions)
         that the (absolute) urls must match in order to be extracted. If not
         given (or empty), it will match all links.
-    :type allow: a regular expression (or list of)
+    :type allow: str or list
 
     :param deny: a single regular expression (or list of regular expressions)
-        that the (absolute) urls must match in order to be excluded (ie. not
+        that the (absolute) urls must match in order to be excluded (i.e. not
         extracted). It has precedence over the ``allow`` parameter. If not
         given (or empty) it won't exclude any links.
-    :type deny: a regular expression (or list of)
+    :type deny: str or list
 
     :param allow_domains: a single value or a list of string containing
         domains which will be considered for extracting the links
@@ -77,17 +64,32 @@ LxmlLinkExtractor
 
     :param deny_extensions: a single value or list of strings containing
         extensions that should be ignored when extracting links.
-        If not given, it will default to the
-        ``IGNORED_EXTENSIONS`` list defined in the `scrapy.linkextractor`_
-        module.
+        If not given, it will default to
+        :data:`scrapy.linkextractors.IGNORED_EXTENSIONS`.
+
+        .. versionchanged:: 2.0
+           :data:`~scrapy.linkextractors.IGNORED_EXTENSIONS` now includes
+           ``7z``, ``7zip``, ``apk``, ``bz2``, ``cdr``, ``dmg``, ``ico``,
+           ``iso``, ``tar``, ``tar.gz``, ``webm``, and ``xz``.
     :type deny_extensions: list
 
-    :param restrict_xpaths: is a XPath (or list of XPath's) which defines
+    :param restrict_xpaths: is an XPath (or list of XPath's) which defines
         regions inside the response where links should be extracted from.
         If given, only the text selected by those XPath will be scanned for
         links. See examples below.
     :type restrict_xpaths: str or list
 
+    :param restrict_css: a CSS selector (or list of selectors) which defines
+        regions inside the response where links should be extracted from.
+        Has the same behaviour as ``restrict_xpaths``.
+    :type restrict_css: str or list
+
+    :param restrict_text: a single regular expression (or list of regular expressions)
+        that the link's text must match in order to be extracted. If not
+        given (or empty), it will match all links. If a list of regular expressions is
+        given, the link will be extracted if it matches at least one.
+    :type restrict_text: str or list
+
     :param tags: a tag or a list of tags to consider when extracting links.
         Defaults to ``('a', 'area')``.
     :type tags: str or list
@@ -98,12 +100,17 @@ LxmlLinkExtractor
     :type attrs: list
 
     :param canonicalize: canonicalize each extracted url (using
-        scrapy.utils.url.canonicalize_url). Defaults to ``True``.
-    :type canonicalize: boolean
+        w3lib.url.canonicalize_url). Defaults to ``False``.
+        Note that canonicalize_url is meant for duplicate checking;
+        it can change the URL visible at server side, so the response can be
+        different for requests with canonicalized and raw URLs. If you're
+        using LinkExtractor to follow links it is more robust to
+        keep the default ``canonicalize=False``.
+    :type canonicalize: bool
 
     :param unique: whether duplicate filtering should be applied to extracted
         links.
-    :type unique: boolean
+    :type unique: bool
 
     :param process_value: a function which receives each value extracted from
         the tag and attributes scanned and can modify the value and return a
@@ -125,6 +132,17 @@ LxmlLinkExtractor
                 if m:
                     return m.group(1)
 
-    :type process_value: callable
+    :type process_value: collections.abc.Callable
 
-.. _scrapy.linkextractor: https://github.com/scrapy/scrapy/blob/master/scrapy/linkextractor.py
+    :param strip: whether to strip whitespaces from extracted attributes.
+        According to HTML5 standard, leading and trailing whitespaces
+        must be stripped from ``href`` attributes of ``<a>``, ``<area>``
+        and many other elements, ``src`` attribute of ``<img>``, ``<iframe>``
+        elements, etc., so LinkExtractor strips space chars by default.
+        Set ``strip=False`` to turn it off (e.g. if you're extracting urls
+        from elements or attributes which allow leading/trailing whitespaces).
+    :type strip: bool
+
+    .. automethod:: extract_links
+
+.. _scrapy.linkextractors: https://github.com/scrapy/scrapy/blob/master/scrapy/linkextractors/__init__.py
diff --git a/docs/topics/loaders.rst b/docs/topics/loaders.rst
index d571d564d..c0f534493 100644
--- a/docs/topics/loaders.rst
+++ b/docs/topics/loaders.rst
@@ -4,16 +4,15 @@
 Item Loaders
 ============
 
-.. module:: scrapy.contrib.loader
+.. module:: scrapy.loader
    :synopsis: Item Loader class
 
-Item Loaders provide a convenient mechanism for populating scraped :ref:`Items
-<topics-items>`. Even though Items can be populated using their own
-dictionary-like API, the Item Loaders provide a much more convenient API for
-populating them from a scraping process, by automating some common tasks like
-parsing the raw extracted data before assigning it.
+Item Loaders provide a convenient mechanism for populating scraped :ref:`items
+<topics-items>`. Even though items can be populated directly, Item Loaders provide a
+much more convenient API for populating them from a scraping process, by automating
+some common tasks like parsing the raw extracted data before assigning it.
 
-In other words, :ref:`Items <topics-items>` provide the *container* of
+In other words, :ref:`items <topics-items>` provide the *container* of
 scraped data, while Item Loaders provide the mechanism for *populating* that
 container.
 
@@ -21,25 +20,35 @@ Item Loaders are designed to provide a flexible, efficient and easy mechanism
 for extending and overriding different field parsing rules, either by spider,
 or by source format (HTML, XML, etc) without becoming a nightmare to maintain.
 
+.. note:: Item Loaders are an extension of the itemloaders_ library that make it
+    easier to work with Scrapy by adding support for
+    :ref:`responses <topics-request-response>`.
+
 Using Item Loaders to populate items
 ====================================
 
 To use an Item Loader, you must first instantiate it. You can either
-instantiate it with an dict-like object (e.g. Item or dict) or without one, in
-which case an Item is automatically instantiated in the Item Loader constructor
-using the Item class specified in the :attr:`ItemLoader.default_item_class`
-attribute.
+instantiate it with an :ref:`item object <topics-items>` or without one, in which
+case an :ref:`item object <topics-items>` is automatically created in the
+Item Loader ``__init__`` method using the :ref:`item <topics-items>` class
+specified in the :attr:`ItemLoader.default_item_class` attribute.
 
 Then, you start collecting values into the Item Loader, typically using
 :ref:`Selectors <topics-selectors>`. You can add more than one value to
 the same item field; the Item Loader will know how to "join" those values later
 using a proper processing function.
 
+.. note:: Collected data is internally stored as lists,
+   allowing to add several values to the same field.
+   If an ``item`` argument is passed when creating a loader,
+   each of the item's values will be stored as-is if it's already
+   an iterable, or wrapped with a list if it's a single value.
+
 Here is a typical Item Loader usage in a :ref:`Spider <topics-spiders>`, using
 the :ref:`Product item <topics-items-declaring>` declared in the :ref:`Items
 chapter <topics-items>`::
 
-    from scrapy.contrib.loader import ItemLoader
+    from scrapy.loader import ItemLoader
     from myproject.items import Product
 
     def parse(self, response):
@@ -61,16 +70,41 @@ In other words, data is being collected by extracting it from two XPath
 locations, using the :meth:`~ItemLoader.add_xpath` method. This is the
 data that will be assigned to the ``name`` field later.
 
-Afterwords, similar calls are used for ``price`` and ``stock`` fields
-(the later using a CSS selector with the :meth:`~ItemLoader.add_css` method),
+Afterwards, similar calls are used for ``price`` and ``stock`` fields
+(the latter using a CSS selector with the :meth:`~ItemLoader.add_css` method),
 and finally the ``last_update`` field is populated directly with a literal value
 (``today``) using a different method: :meth:`~ItemLoader.add_value`.
 
 Finally, when all data is collected, the :meth:`ItemLoader.load_item` method is
-called which actually populates and returns the item populated with the data
+called which actually returns the item populated with the data
 previously extracted and collected with the :meth:`~ItemLoader.add_xpath`,
 :meth:`~ItemLoader.add_css`, and :meth:`~ItemLoader.add_value` calls.
 
+
+.. _topics-loaders-dataclass:
+
+Working with dataclass items
+============================
+
+By default, :ref:`dataclass items <dataclass-items>` require all fields to be
+passed when created. This could be an issue when using dataclass items with
+item loaders: unless a pre-populated item is passed to the loader, fields
+will be populated incrementally using the loader's :meth:`~ItemLoader.add_xpath`,
+:meth:`~ItemLoader.add_css` and :meth:`~ItemLoader.add_value` methods.
+
+One approach to overcome this is to define items using the
+:func:`~dataclasses.field` function, with a ``default`` argument::
+
+    from dataclasses import dataclass, field
+    from typing import Optional
+
+    @dataclass
+    class InventoryItem:
+        name: Optional[str] = field(default=None)
+        price: Optional[float] = field(default=None)
+        stock: Optional[int] = field(default=None)
+
+
 .. _topics-loaders-processors:
 
 Input and Output processors
@@ -82,7 +116,7 @@ received (through the :meth:`~ItemLoader.add_xpath`, :meth:`~ItemLoader.add_css`
 :meth:`~ItemLoader.add_value` methods) and the result of the input processor is
 collected and kept inside the ItemLoader. After collecting all data, the
 :meth:`ItemLoader.load_item` method is called to populate and get the populated
-:class:`~scrapy.item.Item` object.  That's when the output processor is
+:ref:`item object <topics-items>`.  That's when the output processor is
 called with the data previously collected (and processed using the input
 processor). The result of the output processor is the final value that gets
 assigned to the item.
@@ -128,9 +162,12 @@ So what happens is:
 It's worth noticing that processors are just callable objects, which are called
 with the data to be parsed, and return a parsed value. So you can use any
 function as input or output processor. The only requirement is that they must
-accept one (and only one) positional argument, which will be an iterator.
+accept one (and only one) positional argument, which will be an iterable.
 
-.. note:: Both input and output processors must receive an iterator as their
+.. versionchanged:: 2.0
+   Processors no longer need to be methods.
+
+.. note:: Both input and output processors must receive an iterable as their
    first argument. The output of those functions can be anything. The result of
    input processors will be appended to an internal list (in the Loader)
    containing the collected values (for that field). The result of the output
@@ -140,27 +177,26 @@ The other thing you need to keep in mind is that the values returned by input
 processors are collected internally (in lists) and then passed to output
 processors to populate the fields.
 
-Last, but not least, Scrapy comes with some :ref:`commonly used processors
-<topics-loaders-available-processors>` built-in for convenience.
+Last, but not least, itemloaders_ comes with some :ref:`commonly used
+processors <itemloaders:built-in-processors>` built-in for convenience.
 
 
 Declaring Item Loaders
 ======================
 
-Item Loaders are declared like Items, by using a class definition syntax. Here
-is an example::
+Item Loaders are declared using a class definition syntax. Here is an example::
 
-    from scrapy.contrib.loader import ItemLoader
-    from scrapy.contrib.loader.processor import TakeFirst, MapCompose, Join
+    from itemloaders.processors import TakeFirst, MapCompose, Join
+    from scrapy.loader import ItemLoader
 
     class ProductLoader(ItemLoader):
 
         default_output_processor = TakeFirst()
 
-        name_in = MapCompose(unicode.title)
+        name_in = MapCompose(str.title)
         name_out = Join()
 
-        price_in = MapCompose(unicode.strip)
+        price_in = MapCompose(str.strip)
 
         # ...
 
@@ -182,7 +218,7 @@ output processors to use: in the :ref:`Item Field <topics-items-fields>`
 metadata. Here is an example::
 
     import scrapy
-    from scrapy.contrib.loader.processor import Join, MapCompose, TakeFirst
+    from itemloaders.processors import Join, MapCompose, TakeFirst
     from w3lib.html import remove_tags
 
     def filter_price(value):
@@ -199,14 +235,12 @@ metadata. Here is an example::
             output_processor=TakeFirst(),
         )
 
-::
-
-    >>> from scrapy.contrib.loader import ItemLoader
-    >>> il = ItemLoader(item=Product())
-    >>> il.add_value('name', [u'Welcome to my', u'<strong>website</strong>'])
-    >>> il.add_value('price', [u'&euro;', u'<span>1000</span>'])
-    >>> il.load_item()
-    {'name': u'Welcome to my website', 'price': u'1000'}
+>>> from scrapy.loader import ItemLoader
+>>> il = ItemLoader(item=Product())
+>>> il.add_value('name', ['Welcome to my', '<strong>website</strong>'])
+>>> il.add_value('price', ['&euro;', '<span>1000</span>'])
+>>> il.load_item()
+{'name': 'Welcome to my website', 'price': '1000'}
 
 The precedence order, for both input and output processors, is as follows:
 
@@ -250,7 +284,7 @@ There are several ways to modify Item Loader context values:
       loader.context['unit'] = 'cm'
 
 2. On Item Loader instantiation (the keyword arguments of Item Loader
-   constructor are stored in the Item Loader context)::
+   ``__init__`` method are stored in the Item Loader context)::
 
       loader = ItemLoader(product, unit='cm')
 
@@ -265,230 +299,55 @@ There are several ways to modify Item Loader context values:
 ItemLoader objects
 ==================
 
-.. class:: ItemLoader([item, selector, response], \**kwargs)
+.. autoclass:: scrapy.loader.ItemLoader
+    :members:
+    :inherited-members:
 
-    Return a new Item Loader for populating the given Item. If no item is
-    given, one is instantiated automatically using the class in
-    :attr:`default_item_class`.
+.. _topics-loaders-nested:
 
-    When instantiated with a `selector` or a `response` parameters
-    the :class:`ItemLoader` class provides convenient mechanisms for extracting
-    data from web pages using :ref:`selectors <topics-selectors>`.
+Nested Loaders
+==============
 
-    :param item: The item instance to populate using subsequent calls to
-        :meth:`~ItemLoader.add_xpath`, :meth:`~ItemLoader.add_css`,
-        or :meth:`~ItemLoader.add_value`.
-    :type item: :class:`~scrapy.item.Item` object
+When parsing related values from a subsection of a document, it can be
+useful to create nested loaders.  Imagine you're extracting details from
+a footer of a page that looks something like:
 
-    :param selector: The selector to extract data from, when using the
-        :meth:`add_xpath` (resp. :meth:`add_css`) or :meth:`replace_xpath`
-        (resp. :meth:`replace_css`) method.
-    :type selector: :class:`~scrapy.selector.Selector` object
+Example::
 
-    :param response: The response used to construct the selector using the
-        :attr:`default_selector_class`, unless the selector argument is given,
-        in which case this argument is ignored.
-    :type response: :class:`~scrapy.http.Response` object
+    <footer>
+        <a class="social" href="https://facebook.com/whatever">Like Us</a>
+        <a class="social" href="https://twitter.com/whatever">Follow Us</a>
+        <a class="email" href="mailto:whatever@example.com">Email Us</a>
+    </footer>
 
-    The item, selector, response and the remaining keyword arguments are
-    assigned to the Loader context (accessible through the :attr:`context` attribute).
+Without nested loaders, you need to specify the full xpath (or css) for each value
+that you wish to extract.
 
-    :class:`ItemLoader` instances have the following methods:
+Example::
 
-    .. method:: get_value(value, \*processors, \**kwargs)
+    loader = ItemLoader(item=Item())
+    # load stuff not in the footer
+    loader.add_xpath('social', '//footer/a[@class = "social"]/@href')
+    loader.add_xpath('email', '//footer/a[@class = "email"]/@href')
+    loader.load_item()
 
-        Process the given ``value`` by the given ``processors`` and keyword
-        arguments.
+Instead, you can create a nested loader with the footer selector and add values
+relative to the footer.  The functionality is the same but you avoid repeating
+the footer selector.
 
-        Available keyword arguments:
+Example::
 
-        :param re: a regular expression to use for extracting data from the
-            given value using :meth:`~scrapy.utils.misc.extract_regex` method,
-            applied before processors
-        :type re: str or compiled regex
+    loader = ItemLoader(item=Item())
+    # load stuff not in the footer
+    footer_loader = loader.nested_xpath('//footer')
+    footer_loader.add_xpath('social', 'a[@class = "social"]/@href')
+    footer_loader.add_xpath('email', 'a[@class = "email"]/@href')
+    # no need to call footer_loader.load_item()
+    loader.load_item()
 
-        Examples::
-
-            >>> from scrapy.contrib.loader.processor import TakeFirst
-            >>> loader.get_value(u'name: foo', TakeFirst(), unicode.upper, re='name: (.+)')
-            'FOO`
-
-    .. method:: add_value(field_name, value, \*processors, \**kwargs)
-
-        Process and then add the given ``value`` for the given field.
-
-        The value is first passed through :meth:`get_value` by giving the
-        ``processors`` and ``kwargs``, and then passed through the
-        :ref:`field input processor <topics-loaders-processors>` and its result
-        appended to the data collected for that field. If the field already
-        contains collected data, the new data is added.
-
-        The given ``field_name`` can be ``None``, in which case values for
-        multiple fields may be added. And the processed value should be a dict
-        with field_name mapped to values.
-
-        Examples::
-
-            loader.add_value('name', u'Color TV')
-            loader.add_value('colours', [u'white', u'blue'])
-            loader.add_value('length', u'100')
-            loader.add_value('name', u'name: foo', TakeFirst(), re='name: (.+)')
-            loader.add_value(None, {'name': u'foo', 'sex': u'male'})
-
-    .. method:: replace_value(field_name, value, \*processors, \**kwargs)
-
-        Similar to :meth:`add_value` but replaces the collected data with the
-        new value instead of adding it.
-    .. method:: get_xpath(xpath, \*processors, \**kwargs)
-
-        Similar to :meth:`ItemLoader.get_value` but receives an XPath instead of a
-        value, which is used to extract a list of unicode strings from the
-        selector associated with this :class:`ItemLoader`.
-
-        :param xpath: the XPath to extract data from
-        :type xpath: str
-
-        :param re: a regular expression to use for extracting data from the
-            selected XPath region
-        :type re: str or compiled regex
-
-        Examples::
-
-            # HTML snippet: <p class="product-name">Color TV</p>
-            loader.get_xpath('//p[@class="product-name"]')
-            # HTML snippet: <p id="price">the price is $1200</p>
-            loader.get_xpath('//p[@id="price"]', TakeFirst(), re='the price is (.*)')
-
-    .. method:: add_xpath(field_name, xpath, \*processors, \**kwargs)
-
-        Similar to :meth:`ItemLoader.add_value` but receives an XPath instead of a
-        value, which is used to extract a list of unicode strings from the
-        selector associated with this :class:`ItemLoader`.
-
-        See :meth:`get_xpath` for ``kwargs``.
-
-        :param xpath: the XPath to extract data from
-        :type xpath: str
-
-        Examples::
-
-            # HTML snippet: <p class="product-name">Color TV</p>
-            loader.add_xpath('name', '//p[@class="product-name"]')
-            # HTML snippet: <p id="price">the price is $1200</p>
-            loader.add_xpath('price', '//p[@id="price"]', re='the price is (.*)')
-
-    .. method:: replace_xpath(field_name, xpath, \*processors, \**kwargs)
-
-        Similar to :meth:`add_xpath` but replaces collected data instead of
-        adding it.
-
-    .. method:: get_css(css, \*processors, \**kwargs)
-
-        Similar to :meth:`ItemLoader.get_value` but receives a CSS selector
-        instead of a value, which is used to extract a list of unicode strings
-        from the selector associated with this :class:`ItemLoader`.
-
-        :param css: the CSS selector to extract data from
-        :type css: str
-
-        :param re: a regular expression to use for extracting data from the
-            selected CSS region
-        :type re: str or compiled regex
-
-        Examples::
-
-            # HTML snippet: <p class="product-name">Color TV</p>
-            loader.get_css('p.product-name')
-            # HTML snippet: <p id="price">the price is $1200</p>
-            loader.get_css('p#price', TakeFirst(), re='the price is (.*)')
-
-    .. method:: add_css(field_name, css, \*processors, \**kwargs)
-
-        Similar to :meth:`ItemLoader.add_value` but receives a CSS selector
-        instead of a value, which is used to extract a list of unicode strings
-        from the selector associated with this :class:`ItemLoader`.
-
-        See :meth:`get_css` for ``kwargs``.
-
-        :param css: the CSS selector to extract data from
-        :type css: str
-
-        Examples::
-
-            # HTML snippet: <p class="product-name">Color TV</p>
-            loader.add_css('name', 'p.product-name')
-            # HTML snippet: <p id="price">the price is $1200</p>
-            loader.add_css('price', 'p#price', re='the price is (.*)')
-
-    .. method:: replace_css(field_name, css, \*processors, \**kwargs)
-
-        Similar to :meth:`add_css` but replaces collected data instead of
-        adding it.
-
-    .. method:: load_item()
-
-        Populate the item with the data collected so far, and return it. The
-        data collected is first passed through the :ref:`output processors
-        <topics-loaders-processors>` to get the final value to assign to each
-        item field.
-
-    .. method:: get_collected_values(field_name)
-
-        Return the collected values for the given field.
-
-    .. method:: get_output_value(field_name)
-
-        Return the collected values parsed using the output processor, for the
-        given field. This method doesn't populate or modify the item at all.
-
-    .. method:: get_input_processor(field_name)
-
-        Return the input processor for the given field.
-
-    .. method:: get_output_processor(field_name)
-
-        Return the output processor for the given field.
-
-    :class:`ItemLoader` instances have the following attributes:
-
-    .. attribute:: item
-
-        The :class:`~scrapy.item.Item` object being parsed by this Item Loader.
-
-    .. attribute:: context
-
-        The currently active :ref:`Context <topics-loaders-context>` of this
-        Item Loader.
-
-    .. attribute:: default_item_class
-
-        An Item class (or factory), used to instantiate items when not given in
-        the constructor.
-
-    .. attribute:: default_input_processor
-
-        The default input processor to use for those fields which don't specify
-        one.
-
-    .. attribute:: default_output_processor
-
-        The default output processor to use for those fields which don't specify
-        one.
-
-    .. attribute:: default_selector_class
-
-        The class used to construct the :attr:`selector` of this
-        :class:`ItemLoader`, if only a response is given in the constructor.
-        If a selector is given in the constructor this attribute is ignored.
-        This attribute is sometimes overridden in subclasses.
-
-    .. attribute:: selector
-
-        The :class:`~scrapy.selector.Selector` object to extract data from.
-        It's either the selector given in the constructor or one created from
-        the response given in the constructor using the
-        :attr:`default_selector_class`. This attribute is meant to be
-        read-only.
+You can nest loaders arbitrarily and they work with either xpath or css selectors.
+As a general guideline, use nested loaders when they make your code simpler but do
+not go overboard with nesting or your parser can become difficult to read.
 
 .. _topics-loaders-extending:
 
@@ -513,7 +372,7 @@ those dashes in the final product names.
 Here's how you can remove those dashes by reusing and extending the default
 Product Item Loader (``ProductLoader``)::
 
-    from scrapy.contrib.loader.processor import MapCompose
+    from itemloaders.processors import MapCompose
     from myproject.ItemLoaders import ProductLoader
 
     def strip_dashes(x):
@@ -526,7 +385,7 @@ Another case where extending Item Loaders can be very helpful is when you have
 multiple source formats, for example XML and HTML. In the XML version you may
 want to remove ``CDATA`` occurrences. Here's an example of how to do it::
 
-    from scrapy.contrib.loader.processor import MapCompose
+    from itemloaders.processors import MapCompose
     from myproject.ItemLoaders import ProductLoader
     from myproject.utils.xml import remove_cdata
 
@@ -546,132 +405,5 @@ projects. Scrapy only provides the mechanism; it doesn't impose any specific
 organization of your Loaders collection - that's up to you and your project's
 needs.
 
-.. _topics-loaders-available-processors:
-
-Available built-in processors
-=============================
-
-.. module:: scrapy.contrib.loader.processor
-   :synopsis: A collection of processors to use with Item Loaders
-
-Even though you can use any callable function as input and output processors,
-Scrapy provides some commonly used processors, which are described below. Some
-of them, like the :class:`MapCompose` (which is typically used as input
-processor) compose the output of several functions executed in order, to
-produce the final parsed value.
-
-Here is a list of all built-in processors:
-
-.. class:: Identity
-
-    The simplest processor, which doesn't do anything. It returns the original
-    values unchanged. It doesn't receive any constructor arguments nor accepts
-    Loader contexts.
-
-    Example::
-
-        >>> from scrapy.contrib.loader.processor import Identity
-        >>> proc = Identity()
-        >>> proc(['one', 'two', 'three'])
-        ['one', 'two', 'three']
-
-.. class:: TakeFirst
-
-    Returns the first non-null/non-empty value from the values received,
-    so it's typically used as an output processor to single-valued fields.
-    It doesn't receive any constructor arguments, nor accept Loader contexts.
-
-    Example::
-
-        >>> from scrapy.contrib.loader.processor import TakeFirst
-        >>> proc = TakeFirst()
-        >>> proc(['', 'one', 'two', 'three'])
-        'one'
-
-.. class:: Join(separator=u' ')
-
-    Returns the values joined with the separator given in the constructor, which
-    defaults to ``u' '``. It doesn't accept Loader contexts.
-
-    When using the default separator, this processor is equivalent to the
-    function: ``u' '.join``
-
-    Examples::
-
-        >>> from scrapy.contrib.loader.processor import Join
-        >>> proc = Join()
-        >>> proc(['one', 'two', 'three'])
-        u'one two three'
-        >>> proc = Join('<br>')
-        >>> proc(['one', 'two', 'three'])
-        u'one<br>two<br>three'
-
-.. class:: Compose(\*functions, \**default_loader_context)
-
-    A processor which is constructed from the composition of the given
-    functions. This means that each input value of this processor is passed to
-    the first function, and the result of that function is passed to the second
-    function, and so on, until the last function returns the output value of
-    this processor.
-
-    By default, stop process on ``None`` value. This behaviour can be changed by
-    passing keyword argument ``stop_on_none=False``.
-
-    Example::
-
-        >>> from scrapy.contrib.loader.processor import Compose
-        >>> proc = Compose(lambda v: v[0], str.upper)
-        >>> proc(['hello', 'world'])
-        'HELLO'
-
-    Each function can optionally receive a ``loader_context`` parameter. For
-    those which do, this processor will pass the currently active :ref:`Loader
-    context <topics-loaders-context>` through that parameter.
-
-    The keyword arguments passed in the constructor are used as the default
-    Loader context values passed to each function call. However, the final
-    Loader context values passed to functions are overridden with the currently
-    active Loader context accessible through the :meth:`ItemLoader.context`
-    attribute.
-
-.. class:: MapCompose(\*functions, \**default_loader_context)
-
-    A processor which is constructed from the composition of the given
-    functions, similar to the :class:`Compose` processor. The difference with
-    this processor is the way internal results are passed among functions,
-    which is as follows:
-
-    The input value of this processor is *iterated* and the first function is
-    applied to each element. The results of these function calls (one for each element)
-    are concatenated to construct a new iterable, which is then used to apply the
-    second function, and so on, until the last function is applied to each
-    value of the list of values collected so far. The output values of the last
-    function are concatenated together to produce the output of this processor.
-
-    Each particular function can return a value or a list of values, which is
-    flattened with the list of values returned by the same function applied to
-    the other input values. The functions can also return ``None`` in which
-    case the output of that function is ignored for further processing over the
-    chain.
-
-    This processor provides a convenient way to compose functions that only
-    work with single values (instead of iterables). For this reason the
-    :class:`MapCompose` processor is typically used as input processor, since
-    data is often extracted using the
-    :meth:`~scrapy.selector.Selector.extract` method of :ref:`selectors
-    <topics-selectors>`, which returns a list of unicode strings.
-
-    The example below should clarify how it works::
-
-        >>> def filter_world(x):
-        ...     return None if x == 'world' else x
-        ...
-        >>> from scrapy.contrib.loader.processor import MapCompose
-        >>> proc = MapCompose(filter_world, unicode.upper)
-        >>> proc([u'hello', u'world', u'this', u'is', u'scrapy'])
-        [u'HELLO, u'THIS', u'IS', u'SCRAPY']
-
-    As with the Compose processor, functions can receive Loader contexts, and
-    constructor keyword arguments are used as default context values. See
-    :class:`Compose` processor for more info.
-
+.. _itemloaders: https://itemloaders.readthedocs.io/en/latest/
+.. _processors: https://itemloaders.readthedocs.io/en/latest/built-in-processors.html
diff --git a/docs/topics/logging.rst b/docs/topics/logging.rst
index 1a9e975d8..55065a1a3 100644
--- a/docs/topics/logging.rst
+++ b/docs/topics/logging.rst
@@ -4,119 +4,273 @@
 Logging
 =======
 
-Scrapy provides a logging facility which can be used through the
-:mod:`scrapy.log` module. The current underlying implementation uses `Twisted
-logging`_ but this may change in the future.
+.. note::
+    :mod:`scrapy.log` has been deprecated alongside its functions in favor of
+    explicit calls to the Python standard logging. Keep reading to learn more
+    about the new logging system.
 
-.. _Twisted logging: http://twistedmatrix.com/projects/core/documentation/howto/logging.html
+Scrapy uses :mod:`logging` for event logging. We'll
+provide some simple examples to get you started, but for more advanced
+use-cases it's strongly suggested to read thoroughly its documentation.
 
-The logging service must be explicitly started through the :func:`scrapy.log.start` function.
+Logging works out of the box, and can be configured to some extent with the
+Scrapy settings listed in :ref:`topics-logging-settings`.
+
+Scrapy calls :func:`scrapy.utils.log.configure_logging` to set some reasonable
+defaults and handle those settings in :ref:`topics-logging-settings` when
+running commands, so it's recommended to manually call it if you're running
+Scrapy from scripts as described in :ref:`run-from-script`.
 
 .. _topics-logging-levels:
 
 Log levels
 ==========
 
-Scrapy provides 5 logging levels:
+Python's builtin logging defines 5 different levels to indicate the severity of a
+given log message. Here are the standard ones, listed in decreasing order:
 
-1. :data:`~scrapy.log.CRITICAL` - for critical errors
-2. :data:`~scrapy.log.ERROR` - for regular errors
-3. :data:`~scrapy.log.WARNING` - for warning messages
-4. :data:`~scrapy.log.INFO` - for informational messages
-5. :data:`~scrapy.log.DEBUG` - for debugging messages
-
-How to set the log level
-========================
-
-You can set the log level using the `--loglevel/-L` command line option, or
-using the :setting:`LOG_LEVEL` setting.
+1. ``logging.CRITICAL`` - for critical errors (highest severity)
+2. ``logging.ERROR`` - for regular errors
+3. ``logging.WARNING`` - for warning messages
+4. ``logging.INFO`` - for informational messages
+5. ``logging.DEBUG`` - for debugging messages (lowest severity)
 
 How to log messages
 ===================
 
-Here's a quick example of how to log a message using the ``WARNING`` level::
+Here's a quick example of how to log a message using the ``logging.WARNING``
+level::
 
-    from scrapy import log
-    log.msg("This is a warning", level=log.WARNING)
+    import logging
+    logging.warning("This is a warning")
+
+There are shortcuts for issuing log messages on any of the standard 5 levels,
+and there's also a general ``logging.log`` method which takes a given level as
+argument.  If needed, the last example could be rewritten as::
+
+    import logging
+    logging.log(logging.WARNING, "This is a warning")
+
+On top of that, you can create different "loggers" to encapsulate messages. (For
+example, a common practice is to create different loggers for every module).
+These loggers can be configured independently, and they allow hierarchical
+constructions.
+
+The previous examples use the root logger behind the scenes, which is a top level
+logger where all messages are propagated to (unless otherwise specified). Using
+``logging`` helpers is merely a shortcut for getting the root logger
+explicitly, so this is also an equivalent of the last snippets::
+
+    import logging
+    logger = logging.getLogger()
+    logger.warning("This is a warning")
+
+You can use a different logger just by getting its name with the
+``logging.getLogger`` function::
+
+    import logging
+    logger = logging.getLogger('mycustomlogger')
+    logger.warning("This is a warning")
+
+Finally, you can ensure having a custom logger for any module you're working on
+by using the ``__name__`` variable, which is populated with current module's
+path::
+
+    import logging
+    logger = logging.getLogger(__name__)
+    logger.warning("This is a warning")
+
+.. seealso::
+
+    Module logging, :doc:`HowTo <howto/logging>`
+        Basic Logging Tutorial
+
+    Module logging, :ref:`Loggers <logger>`
+        Further documentation on loggers
+
+.. _topics-logging-from-spiders:
 
 Logging from Spiders
 ====================
 
-The recommended way to log from spiders is by using the Spider
-:meth:`~scrapy.spider.Spider.log` method, which already populates the
-``spider`` argument of the :func:`scrapy.log.msg` function. The other arguments
-are passed directly to the :func:`~scrapy.log.msg` function.
+Scrapy provides a :data:`~scrapy.spiders.Spider.logger` within each Spider
+instance, which can be accessed and used like this::
 
-scrapy.log module
-=================
+    import scrapy
 
-.. module:: scrapy.log
-   :synopsis: Logging facility
+    class MySpider(scrapy.Spider):
 
-.. function:: start(logfile=None, loglevel=None, logstdout=None)
+        name = 'myspider'
+        start_urls = ['https://scrapinghub.com']
 
-    Start the logging facility. This must be called before actually logging any
-    messages. Otherwise, messages logged before this call will get lost.
+        def parse(self, response):
+            self.logger.info('Parse function called on %s', response.url)
 
-    :param logfile: the file path to use for logging output. If omitted, the
-        :setting:`LOG_FILE` setting will be used. If both are ``None``, the log
-        will be sent to standard error.
-    :type logfile: str
+That logger is created using the Spider's name, but you can use any custom
+Python logger you want. For example::
 
-    :param loglevel: the minimum logging level to log. Available values are:
-        :data:`CRITICAL`, :data:`ERROR`, :data:`WARNING`, :data:`INFO` and
-        :data:`DEBUG`.
+    import logging
+    import scrapy
 
-    :param logstdout: if ``True``, all standard output (and error) of your
-        application will be logged instead. For example if you "print 'hello'"
-        it will appear in the Scrapy log. If omitted, the :setting:`LOG_STDOUT`
-        setting will be used.
-    :type logstdout: boolean
+    logger = logging.getLogger('mycustomlogger')
 
-.. function:: msg(message, level=INFO, spider=None)
+    class MySpider(scrapy.Spider):
 
-    Log a message
+        name = 'myspider'
+        start_urls = ['https://scrapinghub.com']
 
-    :param message: the message to log
-    :type message: str
+        def parse(self, response):
+            logger.info('Parse function called on %s', response.url)
 
-    :param level: the log level for this message. See
-        :ref:`topics-logging-levels`.
+.. _topics-logging-configuration:
 
-    :param spider: the spider to use for logging this message. This parameter
-        should always be used when logging things related to a particular
-        spider.
-    :type spider: :class:`~scrapy.spider.Spider` object
+Logging configuration
+=====================
 
-.. data:: CRITICAL
+Loggers on their own don't manage how messages sent through them are displayed.
+For this task, different "handlers" can be attached to any logger instance and
+they will redirect those messages to appropriate destinations, such as the
+standard output, files, emails, etc.
 
-    Log level for critical errors
+By default, Scrapy sets and configures a handler for the root logger, based on
+the settings below.
 
-.. data:: ERROR
-
-    Log level for errors
-
-.. data:: WARNING
-
-    Log level for warnings
-
-.. data:: INFO
-
-    Log level for informational messages (recommended level for production
-    deployments)
-
-.. data:: DEBUG
-
-    Log level for debugging messages (recommended level for development)
+.. _topics-logging-settings:
 
 Logging settings
-================
+----------------
 
 These settings can be used to configure the logging:
 
+* :setting:`LOG_FILE`
 * :setting:`LOG_ENABLED`
 * :setting:`LOG_ENCODING`
-* :setting:`LOG_FILE`
 * :setting:`LOG_LEVEL`
+* :setting:`LOG_FORMAT`
+* :setting:`LOG_DATEFORMAT`
 * :setting:`LOG_STDOUT`
+* :setting:`LOG_SHORT_NAMES`
 
+The first couple of settings define a destination for log messages. If
+:setting:`LOG_FILE` is set, messages sent through the root logger will be
+redirected to a file named :setting:`LOG_FILE` with encoding
+:setting:`LOG_ENCODING`. If unset and :setting:`LOG_ENABLED` is ``True``, log
+messages will be displayed on the standard error. Lastly, if
+:setting:`LOG_ENABLED` is ``False``, there won't be any visible log output.
+
+:setting:`LOG_LEVEL` determines the minimum level of severity to display, those
+messages with lower severity will be filtered out. It ranges through the
+possible levels listed in :ref:`topics-logging-levels`.
+
+:setting:`LOG_FORMAT` and :setting:`LOG_DATEFORMAT` specify formatting strings
+used as layouts for all messages. Those strings can contain any placeholders
+listed in :ref:`logging's logrecord attributes docs <logrecord-attributes>` and
+:ref:`datetime's strftime and strptime directives <strftime-strptime-behavior>`
+respectively.
+
+If :setting:`LOG_SHORT_NAMES` is set, then the logs will not display the Scrapy
+component that prints the log. It is unset by default, hence logs contain the
+Scrapy component responsible for that log output.
+
+Command-line options
+--------------------
+
+There are command-line arguments, available for all commands, that you can use
+to override some of the Scrapy settings regarding logging.
+
+* ``--logfile FILE``
+    Overrides :setting:`LOG_FILE`
+* ``--loglevel/-L LEVEL``
+    Overrides :setting:`LOG_LEVEL`
+* ``--nolog``
+    Sets :setting:`LOG_ENABLED` to ``False``
+
+.. seealso::
+
+    Module :mod:`logging.handlers`
+        Further documentation on available handlers
+
+.. _custom-log-formats:
+
+Custom Log Formats
+------------------
+
+A custom log format can be set for different actions by extending
+:class:`~scrapy.logformatter.LogFormatter` class and making
+:setting:`LOG_FORMATTER` point to your new class.
+
+.. autoclass:: scrapy.logformatter.LogFormatter
+   :members:
+
+
+.. _topics-logging-advanced-customization:
+
+Advanced customization
+----------------------
+
+Because Scrapy uses stdlib logging module, you can customize logging using
+all features of stdlib logging.
+
+For example, let's say you're scraping a website which returns many
+HTTP 404 and 500 responses, and you want to hide all messages like this::
+
+    2016-12-16 22:00:06 [scrapy.spidermiddlewares.httperror] INFO: Ignoring
+    response <500 http://quotes.toscrape.com/page/1-34/>: HTTP status code
+    is not handled or not allowed
+
+The first thing to note is a logger name - it is in brackets:
+``[scrapy.spidermiddlewares.httperror]``. If you get just ``[scrapy]`` then
+:setting:`LOG_SHORT_NAMES` is likely set to True; set it to False and re-run
+the crawl.
+
+Next, we can see that the message has INFO level. To hide it
+we should set logging level for ``scrapy.spidermiddlewares.httperror``
+higher than INFO; next level after INFO is WARNING. It could be done
+e.g. in the spider's ``__init__`` method::
+
+    import logging
+    import scrapy
+
+
+    class MySpider(scrapy.Spider):
+        # ...
+        def __init__(self, *args, **kwargs):
+            logger = logging.getLogger('scrapy.spidermiddlewares.httperror')
+            logger.setLevel(logging.WARNING)
+            super().__init__(*args, **kwargs)
+
+If you run this spider again then INFO messages from
+``scrapy.spidermiddlewares.httperror`` logger will be gone.
+
+scrapy.utils.log module
+=======================
+
+.. module:: scrapy.utils.log
+   :synopsis: Logging utils
+
+.. autofunction:: configure_logging
+
+    ``configure_logging`` is automatically called when using Scrapy commands
+    or :class:`~scrapy.crawler.CrawlerProcess`, but needs to be called explicitly
+    when running custom scripts using :class:`~scrapy.crawler.CrawlerRunner`.
+    In that case, its usage is not required but it's recommended.
+
+    Another option when running custom scripts is to manually configure the logging.
+    To do this you can use :func:`logging.basicConfig` to set a basic root handler.
+
+    Note that :class:`~scrapy.crawler.CrawlerProcess` automatically calls ``configure_logging``,
+    so it is recommended to only use :func:`logging.basicConfig` together with
+    :class:`~scrapy.crawler.CrawlerRunner`.
+
+    This is an example on how to redirect ``INFO`` or higher messages to a file::
+
+        import logging
+
+        logging.basicConfig(
+            filename='log.txt',
+            format='%(levelname)s: %(message)s',
+            level=logging.INFO
+        )
+
+    Refer to :ref:`run-from-script` for more details about using Scrapy this
+    way.
diff --git a/docs/topics/media-pipeline.rst b/docs/topics/media-pipeline.rst
new file mode 100644
index 000000000..487e26b8e
--- /dev/null
+++ b/docs/topics/media-pipeline.rst
@@ -0,0 +1,639 @@
+.. _topics-media-pipeline:
+
+===========================================
+Downloading and processing files and images
+===========================================
+
+.. currentmodule:: scrapy.pipelines.images
+
+Scrapy provides reusable :doc:`item pipelines </topics/item-pipeline>` for
+downloading files attached to a particular item (for example, when you scrape
+products and also want to download their images locally). These pipelines share
+a bit of functionality and structure (we refer to them as media pipelines), but
+typically you'll either use the Files Pipeline or the Images Pipeline.
+
+Both pipelines implement these features:
+
+* Avoid re-downloading media that was downloaded recently
+* Specifying where to store the media (filesystem directory, Amazon S3 bucket,
+  Google Cloud Storage bucket)
+
+The Images Pipeline has a few extra functions for processing images:
+
+* Convert all downloaded images to a common format (JPG) and mode (RGB)
+* Thumbnail generation
+* Check images width/height to make sure they meet a minimum constraint
+
+The pipelines also keep an internal queue of those media URLs which are currently
+being scheduled for download, and connect those responses that arrive containing
+the same media to that queue. This avoids downloading the same media more than
+once when it's shared by several items.
+
+Using the Files Pipeline
+========================
+
+The typical workflow, when using the :class:`FilesPipeline` goes like
+this:
+
+1. In a Spider, you scrape an item and put the URLs of the desired into a
+   ``file_urls`` field.
+
+2. The item is returned from the spider and goes to the item pipeline.
+
+3. When the item reaches the :class:`FilesPipeline`, the URLs in the
+   ``file_urls`` field are scheduled for download using the standard
+   Scrapy scheduler and downloader (which means the scheduler and downloader
+   middlewares are reused), but with a higher priority, processing them before other
+   pages are scraped. The item remains "locked" at that particular pipeline stage
+   until the files have finish downloading (or fail for some reason).
+
+4. When the files are downloaded, another field (``files``) will be populated
+   with the results. This field will contain a list of dicts with information
+   about the downloaded files, such as the downloaded path, the original
+   scraped url (taken from the ``file_urls`` field), the file checksum and the file status.
+   The files in the list of the ``files`` field will retain the same order of
+   the original ``file_urls`` field. If some file failed downloading, an
+   error will be logged and the file won't be present in the ``files`` field.
+
+
+Using the Images Pipeline
+=========================
+
+Using the :class:`ImagesPipeline` is a lot like using the :class:`FilesPipeline`,
+except the default field names used are different: you use ``image_urls`` for
+the image URLs of an item and it will populate an ``images`` field for the information
+about the downloaded images.
+
+The advantage of using the :class:`ImagesPipeline` for image files is that you
+can configure some extra functions like generating thumbnails and filtering
+the images based on their size.
+
+The Images Pipeline uses `Pillow`_ for thumbnailing and normalizing images to
+JPEG/RGB format, so you need to install this library in order to use it.
+`Python Imaging Library`_ (PIL) should also work in most cases, but it is known
+to cause troubles in some setups, so we recommend to use `Pillow`_ instead of
+PIL.
+
+.. _Pillow: https://github.com/python-pillow/Pillow
+.. _Python Imaging Library: http://www.pythonware.com/products/pil/
+
+
+.. _topics-media-pipeline-enabling:
+
+Enabling your Media Pipeline
+============================
+
+.. setting:: IMAGES_STORE
+.. setting:: FILES_STORE
+
+To enable your media pipeline you must first add it to your project
+:setting:`ITEM_PIPELINES` setting.
+
+For Images Pipeline, use::
+
+    ITEM_PIPELINES = {'scrapy.pipelines.images.ImagesPipeline': 1}
+
+For Files Pipeline, use::
+
+    ITEM_PIPELINES = {'scrapy.pipelines.files.FilesPipeline': 1}
+
+.. note::
+    You can also use both the Files and Images Pipeline at the same time.
+
+
+Then, configure the target storage setting to a valid value that will be used
+for storing the downloaded images. Otherwise the pipeline will remain disabled,
+even if you include it in the :setting:`ITEM_PIPELINES` setting.
+
+For the Files Pipeline, set the :setting:`FILES_STORE` setting::
+
+   FILES_STORE = '/path/to/valid/dir'
+
+For the Images Pipeline, set the :setting:`IMAGES_STORE` setting::
+
+   IMAGES_STORE = '/path/to/valid/dir'
+
+Supported Storage
+=================
+
+File system storage
+-------------------
+
+The files are stored using a `SHA1 hash`_ of their URLs for the file names.
+
+For example, the following image URL::
+
+    http://www.example.com/image.jpg
+
+Whose ``SHA1 hash`` is::
+
+    3afec3b4765f8f0a07b78f98c07b83f013567a0a
+
+Will be downloaded and stored in the following file::
+
+   <IMAGES_STORE>/full/3afec3b4765f8f0a07b78f98c07b83f013567a0a.jpg
+
+Where:
+
+* ``<IMAGES_STORE>`` is the directory defined in :setting:`IMAGES_STORE` setting
+  for the Images Pipeline.
+
+* ``full`` is a sub-directory to separate full images from thumbnails (if
+  used). For more info see :ref:`topics-images-thumbnails`.
+
+.. _media-pipeline-ftp:
+
+FTP server storage
+------------------
+
+.. versionadded:: 2.0
+
+:setting:`FILES_STORE` and :setting:`IMAGES_STORE` can point to an FTP server.
+Scrapy will automatically upload the files to the server.
+
+:setting:`FILES_STORE` and :setting:`IMAGES_STORE` should be written in one of the
+following forms::
+
+    ftp://username:password@address:port/path
+    ftp://address:port/path
+
+If ``username`` and ``password`` are not provided, they are taken from the :setting:`FTP_USER` and
+:setting:`FTP_PASSWORD` settings respectively.
+
+FTP supports two different connection modes: active or passive. Scrapy uses
+the passive connection mode by default. To use the active connection mode instead,
+set the :setting:`FEED_STORAGE_FTP_ACTIVE` setting to ``True``.
+
+Amazon S3 storage
+-----------------
+
+.. setting:: FILES_STORE_S3_ACL
+.. setting:: IMAGES_STORE_S3_ACL
+
+:setting:`FILES_STORE` and :setting:`IMAGES_STORE` can represent an Amazon S3
+bucket. Scrapy will automatically upload the files to the bucket.
+
+For example, this is a valid :setting:`IMAGES_STORE` value::
+
+    IMAGES_STORE = 's3://bucket/images'
+
+You can modify the Access Control List (ACL) policy used for the stored files,
+which is defined by the :setting:`FILES_STORE_S3_ACL` and
+:setting:`IMAGES_STORE_S3_ACL` settings. By default, the ACL is set to
+``private``. To make the files publicly available use the ``public-read``
+policy::
+
+    IMAGES_STORE_S3_ACL = 'public-read'
+
+For more information, see `canned ACLs`_ in the Amazon S3 Developer Guide.
+
+Because Scrapy uses ``botocore`` internally you can also use other S3-like storages. Storages like
+self-hosted `Minio`_ or `s3.scality`_. All you need to do is set endpoint option in you Scrapy settings::
+
+    AWS_ENDPOINT_URL = 'http://minio.example.com:9000'
+
+For self-hosting you also might feel the need not to use SSL and not to verify SSL connection::
+
+    AWS_USE_SSL = False # or True (None by default)
+    AWS_VERIFY = False # or True (None by default)
+
+.. _Minio: https://github.com/minio/minio
+.. _s3.scality: https://s3.scality.com/
+.. _canned ACLs: https://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl
+
+
+.. _media-pipeline-gcs:
+
+Google Cloud Storage
+---------------------
+
+.. setting:: FILES_STORE_GCS_ACL
+.. setting:: IMAGES_STORE_GCS_ACL
+
+:setting:`FILES_STORE` and :setting:`IMAGES_STORE` can represent a Google Cloud Storage
+bucket. Scrapy will automatically upload the files to the bucket. (requires `google-cloud-storage`_ )
+
+.. _google-cloud-storage: https://cloud.google.com/storage/docs/reference/libraries#client-libraries-install-python
+
+For example, these are valid :setting:`IMAGES_STORE` and :setting:`GCS_PROJECT_ID` settings::
+
+    IMAGES_STORE = 'gs://bucket/images/'
+    GCS_PROJECT_ID = 'project_id'
+
+For information about authentication, see this `documentation`_.
+
+.. _documentation: https://cloud.google.com/docs/authentication/production
+
+You can modify the Access Control List (ACL) policy used for the stored files,
+which is defined by the :setting:`FILES_STORE_GCS_ACL` and
+:setting:`IMAGES_STORE_GCS_ACL` settings. By default, the ACL is set to
+``''`` (empty string) which means that Cloud Storage applies the bucket's default object ACL to the object.
+To make the files publicly available use the ``publicRead``
+policy::
+
+    IMAGES_STORE_GCS_ACL = 'publicRead'
+
+For more information, see `Predefined ACLs`_ in the Google Cloud Platform Developer Guide.
+
+.. _Predefined ACLs: https://cloud.google.com/storage/docs/access-control/lists#predefined-acl
+
+Usage example
+=============
+
+.. setting:: FILES_URLS_FIELD
+.. setting:: FILES_RESULT_FIELD
+.. setting:: IMAGES_URLS_FIELD
+.. setting:: IMAGES_RESULT_FIELD
+
+In order to use a media pipeline, first :ref:`enable it
+<topics-media-pipeline-enabling>`.
+
+Then, if a spider returns an :ref:`item object <topics-items>` with the URLs
+field (``file_urls`` or ``image_urls``, for the Files or Images Pipeline
+respectively), the pipeline will put the results under the respective field
+(``files`` or ``images``).
+
+When using :ref:`item types <item-types>` for which fields are defined beforehand,
+you must define both the URLs field and the results field. For example, when
+using the images pipeline, items must define both the ``image_urls`` and the
+``images`` field. For instance, using the :class:`~scrapy.item.Item` class::
+
+    import scrapy
+
+    class MyItem(scrapy.Item):
+        # ... other item fields ...
+        image_urls = scrapy.Field()
+        images = scrapy.Field()
+
+If you want to use another field name for the URLs key or for the results key,
+it is also possible to override it.
+
+For the Files Pipeline, set :setting:`FILES_URLS_FIELD` and/or
+:setting:`FILES_RESULT_FIELD` settings::
+
+    FILES_URLS_FIELD = 'field_name_for_your_files_urls'
+    FILES_RESULT_FIELD = 'field_name_for_your_processed_files'
+
+For the Images Pipeline, set :setting:`IMAGES_URLS_FIELD` and/or
+:setting:`IMAGES_RESULT_FIELD` settings::
+
+    IMAGES_URLS_FIELD = 'field_name_for_your_images_urls'
+    IMAGES_RESULT_FIELD = 'field_name_for_your_processed_images'
+
+If you need something more complex and want to override the custom pipeline
+behaviour, see :ref:`topics-media-pipeline-override`.
+
+If you have multiple image pipelines inheriting from ImagePipeline and you want
+to have different settings in different pipelines you can set setting keys
+preceded with uppercase name of your pipeline class. E.g. if your pipeline is
+called MyPipeline and you want to have custom IMAGES_URLS_FIELD you define
+setting MYPIPELINE_IMAGES_URLS_FIELD and your custom settings will be used.
+
+
+Additional features
+===================
+
+File expiration
+---------------
+
+.. setting:: IMAGES_EXPIRES
+.. setting:: FILES_EXPIRES
+
+The Image Pipeline avoids downloading files that were downloaded recently. To
+adjust this retention delay use the :setting:`FILES_EXPIRES` setting (or
+:setting:`IMAGES_EXPIRES`, in case of Images Pipeline), which
+specifies the delay in number of days::
+
+    # 120 days of delay for files expiration
+    FILES_EXPIRES = 120
+
+    # 30 days of delay for images expiration
+    IMAGES_EXPIRES = 30
+
+The default value for both settings is 90 days.
+
+If you have pipeline that subclasses FilesPipeline and you'd like to have
+different setting for it you can set setting keys preceded by uppercase
+class name. E.g. given pipeline class called MyPipeline you can set setting key:
+
+    MYPIPELINE_FILES_EXPIRES = 180
+
+and pipeline class MyPipeline will have expiration time set to 180.
+
+.. _topics-images-thumbnails:
+
+Thumbnail generation for images
+-------------------------------
+
+The Images Pipeline can automatically create thumbnails of the downloaded
+images.
+
+.. setting:: IMAGES_THUMBS
+
+In order to use this feature, you must set :setting:`IMAGES_THUMBS` to a dictionary
+where the keys are the thumbnail names and the values are their dimensions.
+
+For example::
+
+   IMAGES_THUMBS = {
+       'small': (50, 50),
+       'big': (270, 270),
+   }
+
+When you use this feature, the Images Pipeline will create thumbnails of the
+each specified size with this format::
+
+    <IMAGES_STORE>/thumbs/<size_name>/<image_id>.jpg
+
+Where:
+
+* ``<size_name>`` is the one specified in the :setting:`IMAGES_THUMBS`
+  dictionary keys (``small``, ``big``, etc)
+
+* ``<image_id>`` is the `SHA1 hash`_ of the image url
+
+.. _SHA1 hash: https://en.wikipedia.org/wiki/SHA_hash_functions
+
+Example of image files stored using ``small`` and ``big`` thumbnail names::
+
+   <IMAGES_STORE>/full/63bbfea82b8880ed33cdb762aa11fab722a90a24.jpg
+   <IMAGES_STORE>/thumbs/small/63bbfea82b8880ed33cdb762aa11fab722a90a24.jpg
+   <IMAGES_STORE>/thumbs/big/63bbfea82b8880ed33cdb762aa11fab722a90a24.jpg
+
+The first one is the full image, as downloaded from the site.
+
+Filtering out small images
+--------------------------
+
+.. setting:: IMAGES_MIN_HEIGHT
+
+.. setting:: IMAGES_MIN_WIDTH
+
+When using the Images Pipeline, you can drop images which are too small, by
+specifying the minimum allowed size in the :setting:`IMAGES_MIN_HEIGHT` and
+:setting:`IMAGES_MIN_WIDTH` settings.
+
+For example::
+
+   IMAGES_MIN_HEIGHT = 110
+   IMAGES_MIN_WIDTH = 110
+
+.. note::
+    The size constraints don't affect thumbnail generation at all.
+
+It is possible to set just one size constraint or both. When setting both of
+them, only images that satisfy both minimum sizes will be saved. For the
+above example, images of sizes (105 x 105) or (105 x 200) or (200 x 105) will
+all be dropped because at least one dimension is shorter than the constraint.
+
+By default, there are no size constraints, so all images are processed.
+
+Allowing redirections
+---------------------
+
+.. setting:: MEDIA_ALLOW_REDIRECTS
+
+By default media pipelines ignore redirects, i.e. an HTTP redirection
+to a media file URL request will mean the media download is considered failed.
+
+To handle media redirections, set this setting to ``True``::
+
+    MEDIA_ALLOW_REDIRECTS = True
+
+.. _topics-media-pipeline-override:
+
+Extending the Media Pipelines
+=============================
+
+.. module:: scrapy.pipelines.files
+   :synopsis: Files Pipeline
+
+See here the methods that you can override in your custom Files Pipeline:
+
+.. class:: FilesPipeline
+
+   .. method:: file_path(self, request, response=None, info=None, *, item=None)
+
+      This method is called once per downloaded item. It returns the
+      download path of the file originating from the specified
+      :class:`response <scrapy.http.Response>`.
+
+      In addition to ``response``, this method receives the original
+      :class:`request <scrapy.Request>`,
+      :class:`info <scrapy.pipelines.media.MediaPipeline.SpiderInfo>` and 
+      :class:`item <scrapy.item.Item>`
+
+      You can override this method to customize the download path of each file.
+
+      For example, if file URLs end like regular paths (e.g.
+      ``https://example.com/a/b/c/foo.png``), you can use the following
+      approach to download all files into the ``files`` folder with their
+      original filenames (e.g. ``files/foo.png``)::
+
+        import os
+        from urllib.parse import urlparse
+
+        from scrapy.pipelines.files import FilesPipeline
+
+        class MyFilesPipeline(FilesPipeline):
+
+            def file_path(self, request, response=None, info=None, *, item=None):
+                return 'files/' + os.path.basename(urlparse(request.url).path)
+
+      Similarly, you can use the ``item`` to determine the file path based on some item 
+      property.
+      
+      By default the :meth:`file_path` method returns
+      ``full/<request URL hash>.<extension>``.
+
+   .. method:: FilesPipeline.get_media_requests(item, info)
+
+      As seen on the workflow, the pipeline will get the URLs of the images to
+      download from the item. In order to do this, you can override the
+      :meth:`~get_media_requests` method and return a Request for each
+      file URL::
+
+         from itemadapter import ItemAdapter
+
+         def get_media_requests(self, item, info):
+             adapter = ItemAdapter(item)
+             for file_url in adapter['file_urls']:
+                 yield scrapy.Request(file_url)
+
+      Those requests will be processed by the pipeline and, when they have finished
+      downloading, the results will be sent to the
+      :meth:`~item_completed` method, as a list of 2-element tuples.
+      Each tuple will contain ``(success, file_info_or_error)`` where:
+
+      * ``success`` is a boolean which is ``True`` if the image was downloaded
+        successfully or ``False`` if it failed for some reason
+
+      * ``file_info_or_error`` is a dict containing the following keys (if
+        success is ``True``) or a :exc:`~twisted.python.failure.Failure` if
+        there was a problem.
+
+        * ``url`` - the url where the file was downloaded from. This is the url of
+          the request returned from the :meth:`~get_media_requests`
+          method.
+
+        * ``path`` - the path (relative to :setting:`FILES_STORE`) where the file
+          was stored
+
+        * ``checksum`` - a `MD5 hash`_ of the image contents
+
+        * ``status`` - the file status indication.
+
+          .. versionadded:: 2.2
+
+          It can be one of the following:
+
+          * ``downloaded`` - file was downloaded.
+          * ``uptodate`` - file was not downloaded, as it was downloaded recently,
+            according to the file expiration policy.
+          * ``cached`` - file was already scheduled for download, by another item
+            sharing the same file.
+
+      The list of tuples received by :meth:`~item_completed` is
+      guaranteed to retain the same order of the requests returned from the
+      :meth:`~get_media_requests` method.
+
+      Here's a typical value of the ``results`` argument::
+
+          [(True,
+            {'checksum': '2b00042f7481c7b056c4b410d28f33cf',
+             'path': 'full/0a79c461a4062ac383dc4fade7bc09f1384a3910.jpg',
+             'url': 'http://www.example.com/files/product1.pdf',
+             'status': 'downloaded'}),
+           (False,
+            Failure(...))]
+
+      By default the :meth:`get_media_requests` method returns ``None`` which
+      means there are no files to download for the item.
+
+   .. method:: FilesPipeline.item_completed(results, item, info)
+
+      The :meth:`FilesPipeline.item_completed` method called when all file
+      requests for a single item have completed (either finished downloading, or
+      failed for some reason).
+
+      The :meth:`~item_completed` method must return the
+      output that will be sent to subsequent item pipeline stages, so you must
+      return (or drop) the item, as you would in any pipeline.
+
+      Here is an example of the :meth:`~item_completed` method where we
+      store the downloaded file paths (passed in results) in the ``file_paths``
+      item field, and we drop the item if it doesn't contain any files::
+
+          from itemadapter import ItemAdapter
+          from scrapy.exceptions import DropItem
+
+          def item_completed(self, results, item, info):
+              file_paths = [x['path'] for ok, x in results if ok]
+              if not file_paths:
+                  raise DropItem("Item contains no files")
+              adapter = ItemAdapter(item)
+              adapter['file_paths'] = file_paths
+              return item
+
+      By default, the :meth:`item_completed` method returns the item.
+
+
+.. module:: scrapy.pipelines.images
+   :synopsis: Images Pipeline
+
+See here the methods that you can override in your custom Images Pipeline:
+
+.. class:: ImagesPipeline
+
+    The :class:`ImagesPipeline` is an extension of the :class:`FilesPipeline`,
+    customizing the field names and adding custom behavior for images.
+
+   .. method:: file_path(self, request, response=None, info=None, *, item=None)
+
+      This method is called once per downloaded item. It returns the
+      download path of the file originating from the specified
+      :class:`response <scrapy.http.Response>`.
+
+      In addition to ``response``, this method receives the original
+      :class:`request <scrapy.Request>`,
+      :class:`info <scrapy.pipelines.media.MediaPipeline.SpiderInfo>` and 
+      :class:`item <scrapy.item.Item>`
+
+      You can override this method to customize the download path of each file.
+
+      For example, if file URLs end like regular paths (e.g.
+      ``https://example.com/a/b/c/foo.png``), you can use the following
+      approach to download all files into the ``files`` folder with their
+      original filenames (e.g. ``files/foo.png``)::
+
+        import os
+        from urllib.parse import urlparse
+
+        from scrapy.pipelines.images import ImagesPipeline
+
+        class MyImagesPipeline(ImagesPipeline):
+
+            def file_path(self, request, response=None, info=None, *, item=None):
+                return 'files/' + os.path.basename(urlparse(request.url).path)
+
+      Similarly, you can use the ``item`` to determine the file path based on some item 
+      property.
+      
+      By default the :meth:`file_path` method returns
+      ``full/<request URL hash>.<extension>``.
+
+   .. method:: ImagesPipeline.get_media_requests(item, info)
+
+      Works the same way as :meth:`FilesPipeline.get_media_requests` method,
+      but using a different field name for image urls.
+
+      Must return a Request for each image URL.
+
+   .. method:: ImagesPipeline.item_completed(results, item, info)
+
+      The :meth:`ImagesPipeline.item_completed` method is called when all image
+      requests for a single item have completed (either finished downloading, or
+      failed for some reason).
+
+      Works the same way as :meth:`FilesPipeline.item_completed` method,
+      but using a different field names for storing image downloading results.
+
+      By default, the :meth:`item_completed` method returns the item.
+
+
+.. _media-pipeline-example:
+
+Custom Images pipeline example
+==============================
+
+Here is a full example of the Images Pipeline whose methods are exemplified
+above::
+
+    import scrapy
+    from itemadapter import ItemAdapter
+    from scrapy.exceptions import DropItem
+    from scrapy.pipelines.images import ImagesPipeline
+
+    class MyImagesPipeline(ImagesPipeline):
+
+        def get_media_requests(self, item, info):
+            for image_url in item['image_urls']:
+                yield scrapy.Request(image_url)
+
+        def item_completed(self, results, item, info):
+            image_paths = [x['path'] for ok, x in results if ok]
+            if not image_paths:
+                raise DropItem("Item contains no images")
+            adapter = ItemAdapter(item)
+            adapter['image_paths'] = image_paths
+            return item
+
+
+To enable your custom media pipeline component you must add its class import path to the
+:setting:`ITEM_PIPELINES` setting, like in the following example::
+
+   ITEM_PIPELINES = {
+       'myproject.pipelines.MyImagesPipeline': 300
+   }
+
+.. _MD5 hash: https://en.wikipedia.org/wiki/MD5
diff --git a/docs/topics/practices.rst b/docs/topics/practices.rst
index 64b3a2da7..cf1de1bd1 100644
--- a/docs/topics/practices.rst
+++ b/docs/topics/practices.rst
@@ -18,32 +18,93 @@ the typical way of running Scrapy via ``scrapy crawl``.
 Remember that Scrapy is built on top of the Twisted
 asynchronous networking library, so you need to run it inside the Twisted reactor.
 
-Note that you will also have to shutdown the Twisted reactor yourself after the
-spider is finished. This can be achieved by connecting a handler to the
-``signals.spider_closed`` signal.
+The first utility you can use to run your spiders is
+:class:`scrapy.crawler.CrawlerProcess`. This class will start a Twisted reactor
+for you, configuring the logging and setting shutdown handlers. This class is
+the one used by all Scrapy commands.
+
+Here's an example showing how to run a single spider with it.
+
+::
+
+    import scrapy
+    from scrapy.crawler import CrawlerProcess
+
+    class MySpider(scrapy.Spider):
+        # Your spider definition
+        ...
+
+    process = CrawlerProcess(settings={
+        "FEEDS": {
+            "items.json": {"format": "json"},
+        },
+    })
+
+    process.crawl(MySpider)
+    process.start() # the script will block here until the crawling is finished
+
+Define settings within dictionary in CrawlerProcess. Make sure to check :class:`~scrapy.crawler.CrawlerProcess`
+documentation to get acquainted with its usage details.
+
+If you are inside a Scrapy project there are some additional helpers you can
+use to import those components within the project. You can automatically import
+your spiders passing their name to :class:`~scrapy.crawler.CrawlerProcess`, and
+use ``get_project_settings`` to get a :class:`~scrapy.settings.Settings`
+instance with your project settings.
 
 What follows is a working example of how to do that, using the `testspiders`_
 project as example.
 
 ::
 
-    from twisted.internet import reactor
-    from scrapy.crawler import Crawler
-    from scrapy import log, signals
-    from testspiders.spiders.followall import FollowAllSpider
+    from scrapy.crawler import CrawlerProcess
     from scrapy.utils.project import get_project_settings
 
-    spider = FollowAllSpider(domain='scrapinghub.com')
-    settings = get_project_settings()
-    crawler = Crawler(settings)
-    crawler.signals.connect(reactor.stop, signal=signals.spider_closed)
-    crawler.configure()
-    crawler.crawl(spider)
-    crawler.start()
-    log.start()
-    reactor.run() # the script will block here until the spider_closed signal was sent
+    process = CrawlerProcess(get_project_settings())
 
-.. seealso:: `Twisted Reactor Overview`_.
+    # 'followall' is the name of one of the spiders of the project.
+    process.crawl('followall', domain='scrapinghub.com')
+    process.start() # the script will block here until the crawling is finished
+
+There's another Scrapy utility that provides more control over the crawling
+process: :class:`scrapy.crawler.CrawlerRunner`. This class is a thin wrapper
+that encapsulates some simple helpers to run multiple crawlers, but it won't
+start or interfere with existing reactors in any way.
+
+Using this class the reactor should be explicitly run after scheduling your
+spiders. It's recommended you use :class:`~scrapy.crawler.CrawlerRunner`
+instead of :class:`~scrapy.crawler.CrawlerProcess` if your application is
+already using Twisted and you want to run Scrapy in the same reactor.
+
+Note that you will also have to shutdown the Twisted reactor yourself after the
+spider is finished. This can be achieved by adding callbacks to the deferred
+returned by the :meth:`CrawlerRunner.crawl
+<scrapy.crawler.CrawlerRunner.crawl>` method.
+
+Here's an example of its usage, along with a callback to manually stop the
+reactor after ``MySpider`` has finished running.
+
+::
+
+    from twisted.internet import reactor
+    import scrapy
+    from scrapy.crawler import CrawlerRunner
+    from scrapy.utils.log import configure_logging
+
+    class MySpider(scrapy.Spider):
+        # Your spider definition
+        ...
+
+    configure_logging({'LOG_FORMAT': '%(levelname)s: %(message)s'})
+    runner = CrawlerRunner()
+
+    d = runner.crawl(MySpider)
+    d.addBoth(lambda _: reactor.stop())
+    reactor.run() # the script will block here until the crawling is finished
+
+.. seealso:: :doc:`twisted:core/howto/reactor-basics`
+
+.. _run-multiple-spiders:
 
 Running multiple spiders in the same process
 ============================================
@@ -52,28 +113,79 @@ By default, Scrapy runs a single spider per process when you run ``scrapy
 crawl``. However, Scrapy supports running multiple spiders per process using
 the :ref:`internal API <topics-api>`.
 
-Here is an example, using the `testspiders`_ project:
+Here is an example that runs multiple spiders simultaneously:
 
 ::
 
+    import scrapy
+    from scrapy.crawler import CrawlerProcess
+
+    class MySpider1(scrapy.Spider):
+        # Your first spider definition
+        ...
+
+    class MySpider2(scrapy.Spider):
+        # Your second spider definition
+        ...
+
+    process = CrawlerProcess()
+    process.crawl(MySpider1)
+    process.crawl(MySpider2)
+    process.start() # the script will block here until all crawling jobs are finished
+
+Same example using :class:`~scrapy.crawler.CrawlerRunner`:
+
+::
+
+    import scrapy
     from twisted.internet import reactor
-    from scrapy.crawler import Crawler
-    from scrapy import log
-    from testspiders.spiders.followall import FollowAllSpider
-    from scrapy.utils.project import get_project_settings
+    from scrapy.crawler import CrawlerRunner
+    from scrapy.utils.log import configure_logging
 
-    def setup_crawler(domain):
-        spider = FollowAllSpider(domain=domain)
-        settings = get_project_settings()
-        crawler = Crawler(settings)
-        crawler.configure()
-        crawler.crawl(spider)
-        crawler.start()
+    class MySpider1(scrapy.Spider):
+        # Your first spider definition
+        ...
 
-    for domain in ['scrapinghub.com', 'insophia.com']:
-        setup_crawler(domain)
-    log.start()
-    reactor.run()
+    class MySpider2(scrapy.Spider):
+        # Your second spider definition
+        ...
+
+    configure_logging()
+    runner = CrawlerRunner()
+    runner.crawl(MySpider1)
+    runner.crawl(MySpider2)
+    d = runner.join()
+    d.addBoth(lambda _: reactor.stop())
+
+    reactor.run() # the script will block here until all crawling jobs are finished
+
+Same example but running the spiders sequentially by chaining the deferreds:
+
+::
+
+    from twisted.internet import reactor, defer
+    from scrapy.crawler import CrawlerRunner
+    from scrapy.utils.log import configure_logging
+
+    class MySpider1(scrapy.Spider):
+        # Your first spider definition
+        ...
+
+    class MySpider2(scrapy.Spider):
+        # Your second spider definition
+        ...
+
+    configure_logging()
+    runner = CrawlerRunner()
+
+    @defer.inlineCallbacks
+    def crawl():
+        yield runner.crawl(MySpider1)
+        yield runner.crawl(MySpider2)
+        reactor.stop()
+
+    crawl()
+    reactor.run() # the script will block here until the last crawl call is finished
 
 .. seealso:: :ref:`run-from-script`.
 
@@ -118,7 +230,7 @@ with varying degrees of sophistication. Getting around those measures can be
 difficult and tricky, and may sometimes require special infrastructure. Please
 consider contacting `commercial support`_ if in doubt.
 
-Here are some tips to keep in mind when dealing with these kind of sites:
+Here are some tips to keep in mind when dealing with these kinds of sites:
 
 * rotate your user agent from a pool of well-known ones from browsers (google
   around to get a list of them)
@@ -128,7 +240,8 @@ Here are some tips to keep in mind when dealing with these kind of sites:
 * if possible, use `Google cache`_ to fetch pages, instead of hitting the sites
   directly
 * use a pool of rotating IPs. For example, the free `Tor project`_ or paid
-  services like `ProxyMesh`_
+  services like `ProxyMesh`_. An open source alternative is `scrapoxy`_, a
+  super proxy that you can attach your own proxies to.
 * use a highly distributed downloader that circumvents bans internally, so you
   can just focus on parsing clean pages. One example of such downloaders is
   `Crawlera`_
@@ -137,30 +250,9 @@ If you are still unable to prevent your bot getting banned, consider contacting
 `commercial support`_.
 
 .. _Tor project: https://www.torproject.org/
-.. _commercial support: http://scrapy.org/support/
-.. _ProxyMesh: http://proxymesh.com/
+.. _commercial support: https://scrapy.org/support/
+.. _ProxyMesh: https://proxymesh.com/
 .. _Google cache: http://www.googleguide.com/cached_pages.html
 .. _testspiders: https://github.com/scrapinghub/testspiders
-.. _Twisted Reactor Overview: http://twistedmatrix.com/documents/current/core/howto/reactor-basics.html
-.. _Crawlera: http://crawlera.com
-
-.. _dynamic-item-classes:
-
-Dynamic Creation of Item Classes
-================================
-
-For applications in which the structure of item class is to be determined by
-user input, or other changing conditions, you can dynamically create item
-classes instead of manually coding them.
-
-::
-
-
-    from scrapy.item import DictItem, Field
-
-    def create_item_class(class_name, field_list):
-        field_dict = {}
-        for field_name in field_list:
-            field_dict[field_name] = Field()
-
-        return type(class_name, (DictItem,), field_dict)
+.. _Crawlera: https://scrapinghub.com/crawlera
+.. _scrapoxy: https://scrapoxy.io/
diff --git a/docs/topics/request-response.rst b/docs/topics/request-response.rst
index 4723565e7..d0136137f 100644
--- a/docs/topics/request-response.rst
+++ b/docs/topics/request-response.rst
@@ -24,37 +24,39 @@ below in :ref:`topics-request-response-ref-request-subclasses` and
 Request objects
 ===============
 
-.. class:: Request(url[, callback, method='GET', headers, body, cookies, meta, encoding='utf-8', priority=0, dont_filter=False, errback])
+.. autoclass:: Request
 
     A :class:`Request` object represents an HTTP request, which is usually
     generated in the Spider and executed by the Downloader, and thus generating
     a :class:`Response`.
 
     :param url: the URL of this request
-    :type url: string
+
+        If the URL is invalid, a :exc:`ValueError` exception is raised.
+    :type url: str
 
     :param callback: the function that will be called with the response of this
-       request (once its downloaded) as its first parameter. For more information
+       request (once it's downloaded) as its first parameter. For more information
        see :ref:`topics-request-response-ref-request-callback-arguments` below.
        If a Request doesn't specify a callback, the spider's
-       :meth:`~scrapy.spider.Spider.parse` method will be used.
+       :meth:`~scrapy.spiders.Spider.parse` method will be used.
        Note that if exceptions are raised during processing, errback is called instead.
 
-    :type callback: callable
+    :type callback: collections.abc.Callable
 
     :param method: the HTTP method of this request. Defaults to ``'GET'``.
-    :type method: string
+    :type method: str
 
     :param meta: the initial values for the :attr:`Request.meta` attribute. If
        given, the dict passed in this parameter will be shallow copied.
     :type meta: dict
 
-    :param body: the request body. If a ``unicode`` is passed, then it's encoded to
-      ``str`` using the `encoding` passed (which defaults to ``utf-8``). If
-      ``body`` is not given,, an empty string is stored. Regardless of the
-      type of this argument, the final value stored will be a ``str`` (never
-      ``unicode`` or ``None``).
-    :type body: str or unicode
+    :param body: the request body. If a string is passed, then it's encoded as
+      bytes using the ``encoding`` passed (which defaults to ``utf-8``). If
+      ``body`` is not given, an empty bytes object is stored. Regardless of the
+      type of this argument, the final value stored will be a bytes object
+      (never a string or ``None``).
+    :type body: bytes or str
 
     :param headers: the headers of this request. The dict values can be strings
        (for single valued headers) or lists (for multi-valued headers). If
@@ -67,6 +69,7 @@ Request objects
 
             request_with_cookies = Request(url="http://www.example.com",
                                            cookies={'currency': 'USD', 'country': 'UY'})
+
         2. Using a list of dicts::
 
             request_with_cookies = Request(url="http://www.example.com",
@@ -79,26 +82,32 @@ Request objects
         attributes of the cookie. This is only useful if the cookies are saved
         for later requests.
 
+        .. reqmeta:: dont_merge_cookies
+
         When some site returns cookies (in a response) those are stored in the
-        cookies for that domain and will be sent again in future requests. That's
-        the typical behaviour of any regular web browser. However, if, for some
-        reason, you want to avoid merging with existing cookies you can instruct
-        Scrapy to do so by setting the ``dont_merge_cookies`` key to True in the
-        :attr:`Request.meta`.
+        cookies for that domain and will be sent again in future requests.
+        That's the typical behaviour of any regular web browser.
 
-        Example of request without merging cookies::
+        To create a request that does not send stored cookies and does not
+        store received cookies, set the ``dont_merge_cookies`` key to ``True``
+        in :attr:`request.meta <scrapy.http.Request.meta>`.
 
-            request_with_cookies = Request(url="http://www.example.com",
-                                           cookies={'currency': 'USD', 'country': 'UY'},
-                                           meta={'dont_merge_cookies': True})
+        Example of a request that sends manually-defined cookies and ignores
+        cookie storage::
+
+            Request(
+                url="http://www.example.com",
+                cookies={'currency': 'USD', 'country': 'UY'},
+                meta={'dont_merge_cookies': True},
+            )
 
         For more info see :ref:`cookies-mw`.
     :type cookies: dict or list
 
     :param encoding: the encoding of this request (defaults to ``'utf-8'``).
        This encoding will be used to percent-encode the URL and to convert the
-       body to ``str`` (if given as ``unicode``).
-    :type encoding: string
+       body to bytes (if given as a string).
+    :type encoding: str
 
     :param priority: the priority of this request (defaults to ``0``).
        The priority is used by the scheduler to define the order used to process
@@ -110,19 +119,31 @@ Request objects
        the scheduler. This is used when you want to perform an identical
        request multiple times, to ignore the duplicates filter. Use it with
        care, or you will get into crawling loops. Default to ``False``.
-    :type dont_filter: boolean
+    :type dont_filter: bool
 
     :param errback: a function that will be called if any exception was
        raised while processing the request. This includes pages that failed
-       with 404 HTTP errors and such. It receives a `Twisted Failure`_ instance
-       as first parameter.
-    :type errback: callable
+       with 404 HTTP errors and such. It receives a
+       :exc:`~twisted.python.failure.Failure` as first parameter.
+       For more information,
+       see :ref:`topics-request-response-ref-errbacks` below.
+
+       .. versionchanged:: 2.0
+          The *callback* parameter is no longer required when the *errback*
+          parameter is specified.
+    :type errback: collections.abc.Callable
+
+    :param flags:  Flags sent to the request, can be used for logging or similar purposes.
+    :type flags: list
+
+    :param cb_kwargs: A dict with arbitrary data that will be passed as keyword arguments to the Request's callback.
+    :type cb_kwargs: dict
 
     .. attribute:: Request.url
 
         A string containing the URL of this request. Keep in mind that this
         attribute contains the escaped URL, so it can differ from the URL passed in
-        the constructor.
+        the ``__init__`` method.
 
         This attribute is read-only. To change the URL of a Request use
         :meth:`replace`.
@@ -138,7 +159,7 @@ Request objects
 
     .. attribute:: Request.body
 
-        A str that contains the request body.
+        The request body as bytes.
 
         This attribute is read-only. To change the body of a Request use
         :meth:`replace`.
@@ -153,25 +174,40 @@ Request objects
         See :ref:`topics-request-meta` for a list of special meta keys
         recognized by Scrapy.
 
-        This dict is `shallow copied`_ when the request is cloned using the
-        ``copy()`` or ``replace()`` methods, and can also be accessed, in your
-        spider, from the ``response.meta`` attribute.
+        This dict is :doc:`shallow copied <library/copy>` when the request is
+        cloned using the ``copy()`` or ``replace()`` methods, and can also be
+        accessed, in your spider, from the ``response.meta`` attribute.
 
-    .. _shallow copied: http://docs.python.org/library/copy.html
+    .. attribute:: Request.cb_kwargs
+
+        A dictionary that contains arbitrary metadata for this request. Its contents
+        will be passed to the Request's callback as keyword arguments. It is empty
+        for new Requests, which means by default callbacks only get a :class:`Response`
+        object as argument.
+
+        This dict is :doc:`shallow copied <library/copy>` when the request is
+        cloned using the ``copy()`` or ``replace()`` methods, and can also be
+        accessed, in your spider, from the ``response.cb_kwargs`` attribute.
+
+        In case of a failure to process the request, this dict can be accessed as
+        ``failure.request.cb_kwargs`` in the request's errback. For more information,
+        see :ref:`errback-cb_kwargs`.
 
     .. method:: Request.copy()
 
        Return a new Request which is a copy of this Request. See also:
        :ref:`topics-request-response-ref-request-callback-arguments`.
 
-    .. method:: Request.replace([url, method, headers, body, cookies, meta, encoding, dont_filter, callback, errback])
+    .. method:: Request.replace([url, method, headers, body, cookies, meta, flags, encoding, priority, dont_filter, callback, errback, cb_kwargs])
 
        Return a Request object with the same members, except for those members
        given new values by whichever keyword arguments are specified. The
-       attribute :attr:`Request.meta` is copied by default (unless a new value
-       is given in the ``meta`` argument). See also
+       :attr:`Request.cb_kwargs` and :attr:`Request.meta` attributes are shallow
+       copied by default (unless new values are given as arguments). See also
        :ref:`topics-request-response-ref-request-callback-arguments`.
 
+    .. automethod:: from_curl
+
 .. _topics-request-response-ref-request-callback-arguments:
 
 Passing additional data to callback functions
@@ -189,27 +225,121 @@ Example::
 
     def parse_page2(self, response):
         # this would log http://www.example.com/some_page.html
-        self.log("Visited %s" % response.url)
+        self.logger.info("Visited %s", response.url)
 
 In some cases you may be interested in passing arguments to those callback
-functions so you can receive the arguments later, in the second callback. You
-can use the :attr:`Request.meta` attribute for that.
+functions so you can receive the arguments later, in the second callback.
+The following example shows how to achieve this by using the
+:attr:`Request.cb_kwargs` attribute:
 
-Here's an example of how to pass an item using this mechanism, to populate
-different fields from different pages::
+::
 
-    def parse_page1(self, response):
-        item = MyItem()
-        item['main_url'] = response.url
-        request = scrapy.Request("http://www.example.com/some_page.html",
-                                 callback=self.parse_page2)
-        request.meta['item'] = item
-        return request
+    def parse(self, response):
+        request = scrapy.Request('http://www.example.com/index.html',
+                                 callback=self.parse_page2,
+                                 cb_kwargs=dict(main_url=response.url))
+        request.cb_kwargs['foo'] = 'bar'  # add more arguments for the callback
+        yield request
 
-    def parse_page2(self, response):
-        item = response.meta['item']
-        item['other_url'] = response.url
-        return item
+    def parse_page2(self, response, main_url, foo):
+        yield dict(
+            main_url=main_url,
+            other_url=response.url,
+            foo=foo,
+        )
+
+.. caution:: :attr:`Request.cb_kwargs` was introduced in version ``1.7``.
+   Prior to that, using :attr:`Request.meta` was recommended for passing
+   information around callbacks. After ``1.7``, :attr:`Request.cb_kwargs`
+   became the preferred way for handling user information, leaving :attr:`Request.meta`
+   for communication with components like middlewares and extensions.
+
+.. _topics-request-response-ref-errbacks:
+
+Using errbacks to catch exceptions in request processing
+--------------------------------------------------------
+
+The errback of a request is a function that will be called when an exception
+is raise while processing it.
+
+It receives a :exc:`~twisted.python.failure.Failure` as first parameter and can
+be used to track connection establishment timeouts, DNS errors etc.
+
+Here's an example spider logging all errors and catching some specific
+errors if needed::
+
+    import scrapy
+
+    from scrapy.spidermiddlewares.httperror import HttpError
+    from twisted.internet.error import DNSLookupError
+    from twisted.internet.error import TimeoutError, TCPTimedOutError
+
+    class ErrbackSpider(scrapy.Spider):
+        name = "errback_example"
+        start_urls = [
+            "http://www.httpbin.org/",              # HTTP 200 expected
+            "http://www.httpbin.org/status/404",    # Not found error
+            "http://www.httpbin.org/status/500",    # server issue
+            "http://www.httpbin.org:12345/",        # non-responding host, timeout expected
+            "http://www.httphttpbinbin.org/",       # DNS error expected
+        ]
+
+        def start_requests(self):
+            for u in self.start_urls:
+                yield scrapy.Request(u, callback=self.parse_httpbin,
+                                        errback=self.errback_httpbin,
+                                        dont_filter=True)
+
+        def parse_httpbin(self, response):
+            self.logger.info('Got successful response from {}'.format(response.url))
+            # do something useful here...
+
+        def errback_httpbin(self, failure):
+            # log all failures
+            self.logger.error(repr(failure))
+
+            # in case you want to do something special for some errors,
+            # you may need the failure's type:
+
+            if failure.check(HttpError):
+                # these exceptions come from HttpError spider middleware
+                # you can get the non-200 response
+                response = failure.value.response
+                self.logger.error('HttpError on %s', response.url)
+
+            elif failure.check(DNSLookupError):
+                # this is the original request
+                request = failure.request
+                self.logger.error('DNSLookupError on %s', request.url)
+
+            elif failure.check(TimeoutError, TCPTimedOutError):
+                request = failure.request
+                self.logger.error('TimeoutError on %s', request.url)
+
+.. _errback-cb_kwargs:
+
+Accessing additional data in errback functions
+----------------------------------------------
+
+In case of a failure to process the request, you may be interested in
+accessing arguments to the callback functions so you can process further
+based on the arguments in the errback. The following example shows how to
+achieve this by using ``Failure.request.cb_kwargs``::
+
+    def parse(self, response):
+        request = scrapy.Request('http://www.example.com/index.html',
+                                 callback=self.parse_page2,
+                                 errback=self.errback_page2,
+                                 cb_kwargs=dict(main_url=response.url))
+        yield request
+
+    def parse_page2(self, response, main_url):
+        pass
+
+    def errback_page2(self, failure):
+        yield dict(
+            main_url=failure.request.cb_kwargs['main_url'],
+        )
 
 .. _topics-request-meta:
 
@@ -224,10 +354,23 @@ Those are:
 * :reqmeta:`dont_redirect`
 * :reqmeta:`dont_retry`
 * :reqmeta:`handle_httpstatus_list`
-* ``dont_merge_cookies`` (see ``cookies`` parameter of :class:`Request` constructor)
+* :reqmeta:`handle_httpstatus_all`
+* :reqmeta:`dont_merge_cookies`
 * :reqmeta:`cookiejar`
+* :reqmeta:`dont_cache`
+* :reqmeta:`redirect_reasons`
 * :reqmeta:`redirect_urls`
 * :reqmeta:`bindaddress`
+* :reqmeta:`dont_obey_robotstxt`
+* :reqmeta:`download_timeout`
+* :reqmeta:`download_maxsize`
+* :reqmeta:`download_latency`
+* :reqmeta:`download_fail_on_dataloss`
+* :reqmeta:`proxy`
+* ``ftp_user`` (See :setting:`FTP_USER` for more info)
+* ``ftp_password`` (See :setting:`FTP_PASSWORD` for more info)
+* :reqmeta:`referrer_policy`
+* :reqmeta:`max_retry_times`
 
 .. reqmeta:: bindaddress
 
@@ -236,6 +379,86 @@ bindaddress
 
 The IP of the outgoing IP address to use for the performing the request.
 
+.. reqmeta:: download_timeout
+
+download_timeout
+----------------
+
+The amount of time (in secs) that the downloader will wait before timing out.
+See also: :setting:`DOWNLOAD_TIMEOUT`.
+
+.. reqmeta:: download_latency
+
+download_latency
+----------------
+
+The amount of time spent to fetch the response, since the request has been
+started, i.e. HTTP message sent over the network. This meta key only becomes
+available when the response has been downloaded. While most other meta keys are
+used to control Scrapy behavior, this one is supposed to be read-only.
+
+.. reqmeta:: download_fail_on_dataloss
+
+download_fail_on_dataloss
+-------------------------
+
+Whether or not to fail on broken responses. See:
+:setting:`DOWNLOAD_FAIL_ON_DATALOSS`.
+
+.. reqmeta:: max_retry_times
+
+max_retry_times
+---------------
+
+The meta key is used set retry times per request. When initialized, the
+:reqmeta:`max_retry_times` meta key takes higher precedence over the
+:setting:`RETRY_TIMES` setting.
+
+
+.. _topics-stop-response-download:
+
+Stopping the download of a Response
+===================================
+
+Raising a :exc:`~scrapy.exceptions.StopDownload` exception from a
+:class:`~scrapy.signals.bytes_received` signal handler will stop the
+download of a given response. See the following example::
+
+    import scrapy
+
+
+    class StopSpider(scrapy.Spider):
+        name = "stop"
+        start_urls = ["https://docs.scrapy.org/en/latest/"]
+
+        @classmethod
+        def from_crawler(cls, crawler):
+            spider = super().from_crawler(crawler)
+            crawler.signals.connect(spider.on_bytes_received, signal=scrapy.signals.bytes_received)
+            return spider
+
+        def parse(self, response):
+            # 'last_chars' show that the full response was not downloaded
+            yield {"len": len(response.text), "last_chars": response.text[-40:]}
+
+        def on_bytes_received(self, data, request, spider):
+            raise scrapy.exceptions.StopDownload(fail=False)
+
+which produces the following output::
+
+    2020-05-19 17:26:12 [scrapy.core.engine] INFO: Spider opened
+    2020-05-19 17:26:12 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
+    2020-05-19 17:26:13 [scrapy.core.downloader.handlers.http11] DEBUG: Download stopped for <GET https://docs.scrapy.org/en/latest/> from signal handler StopSpider.on_bytes_received
+    2020-05-19 17:26:13 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://docs.scrapy.org/en/latest/> (referer: None) ['download_stopped']
+    2020-05-19 17:26:13 [scrapy.core.scraper] DEBUG: Scraped from <200 https://docs.scrapy.org/en/latest/>
+    {'len': 279, 'last_chars': 'dth, initial-scale=1.0">\n  \n  <title>Scr'}
+    2020-05-19 17:26:13 [scrapy.core.engine] INFO: Closing spider (finished)
+
+By default, resulting responses are handled by their corresponding errbacks. To
+call their callback instead, like in this example, pass ``fail=False`` to the
+:exc:`~scrapy.exceptions.StopDownload` exception.
+
+
 .. _topics-request-response-ref-request-subclasses:
 
 Request subclasses
@@ -251,23 +474,23 @@ The FormRequest class extends the base :class:`Request` with functionality for
 dealing with HTML forms. It uses `lxml.html forms`_  to pre-populate form
 fields with form data from :class:`Response` objects.
 
-.. _lxml.html forms: http://lxml.de/lxmlhtml.html#forms
+.. _lxml.html forms: https://lxml.de/lxmlhtml.html#forms
 
 .. class:: FormRequest(url, [formdata, ...])
 
-    The :class:`FormRequest` class adds a new argument to the constructor. The
+    The :class:`FormRequest` class adds a new keyword parameter to the ``__init__`` method. The
     remaining arguments are the same as for the :class:`Request` class and are
     not documented here.
 
     :param formdata: is a dictionary (or iterable of (key, value) tuples)
        containing HTML Form data which will be url-encoded and assigned to the
        body of the request.
-    :type formdata: dict or iterable of tuples
+    :type formdata: dict or collections.abc.Iterable
 
     The :class:`FormRequest` objects support the following class method in
     addition to the standard :class:`Request` methods:
 
-    .. classmethod:: FormRequest.from_response(response, [formname=None, formnumber=0, formdata=None, formxpath=None, clickdata=None, dont_click=False, ...])
+    .. classmethod:: FormRequest.from_response(response, [formname=None, formid=None, formnumber=0, formdata=None, formxpath=None, formcss=None, clickdata=None, dont_click=False, ...])
 
        Returns a new :class:`FormRequest` object with its form field values
        pre-populated with those found in the HTML ``<form>`` element contained
@@ -285,23 +508,35 @@ fields with form data from :class:`Response` objects.
        control clicked (instead of disabling it) you can also use the
        ``clickdata`` argument.
 
+       .. caution:: Using this method with select elements which have leading
+          or trailing whitespace in the option values will not work due to a
+          `bug in lxml`_, which should be fixed in lxml 3.8 and above.
+
        :param response: the response containing a HTML form which will be used
           to pre-populate the form fields
        :type response: :class:`Response` object
 
        :param formname: if given, the form with name attribute set to this value will be used.
-       :type formname: string
+       :type formname: str
+
+       :param formid: if given, the form with id attribute set to this value will be used.
+       :type formid: str
 
        :param formxpath: if given, the first form that matches the xpath will be used.
-       :type formxpath: string
+       :type formxpath: str
+
+       :param formcss: if given, the first form that matches the css selector will be used.
+       :type formcss: str
 
        :param formnumber: the number of form to use, when the response contains
           multiple forms. The first one (and also the default) is ``0``.
-       :type formnumber: integer
+       :type formnumber: int
 
        :param formdata: fields to override in the form data. If a field was
           already present in the response ``<form>`` element, its value is
-          overridden by the one passed in this parameter.
+          overridden by the one passed in this parameter. If a value passed in
+          this parameter is ``None``, the field will not be included in the
+          request, even if it was present in the response ``<form>`` element.
        :type formdata: dict
 
        :param clickdata: attributes to lookup the control clicked. If it's not
@@ -313,10 +548,10 @@ fields with form data from :class:`Response` objects.
 
        :param dont_click: If True, the form data will be submitted without
          clicking in any element.
-       :type dont_click: boolean
+       :type dont_click: bool
 
        The other parameters of this class method are passed directly to the
-       :class:`FormRequest` constructor.
+       :class:`FormRequest` ``__init__`` method.
 
        .. versionadded:: 0.10.3
           The ``formname`` parameter.
@@ -324,6 +559,12 @@ fields with form data from :class:`Response` objects.
        .. versionadded:: 0.17
           The ``formxpath`` parameter.
 
+       .. versionadded:: 1.1.0
+          The ``formcss`` parameter.
+
+       .. versionadded:: 1.1.0
+          The ``formid`` parameter.
+
 Request usage examples
 ----------------------
 
@@ -353,6 +594,11 @@ method for this job. Here's an example spider which uses it::
 
     import scrapy
 
+    def authentication_failed(response):
+        # TODO: Check the contents of the response and return True if it failed
+        # or False if it succeeded.
+        pass
+
     class LoginSpider(scrapy.Spider):
         name = 'example.com'
         start_urls = ['http://www.example.com/users/login.php']
@@ -365,47 +611,91 @@ method for this job. Here's an example spider which uses it::
             )
 
         def after_login(self, response):
-            # check login succeed before going on
-            if "authentication failed" in response.body:
-                self.log("Login failed", level=log.ERROR)
+            if authentication_failed(response):
+                self.logger.error("Login failed")
                 return
 
             # continue scraping with authenticated session...
 
+JsonRequest
+-----------
+
+The JsonRequest class extends the base :class:`Request` class with functionality for
+dealing with JSON requests.
+
+.. class:: JsonRequest(url, [... data, dumps_kwargs])
+
+   The :class:`JsonRequest` class adds two new keyword parameters to the ``__init__`` method. The
+   remaining arguments are the same as for the :class:`Request` class and are
+   not documented here.
+
+   Using the :class:`JsonRequest` will set the ``Content-Type`` header to ``application/json``
+   and ``Accept`` header to ``application/json, text/javascript, */*; q=0.01``
+
+   :param data: is any JSON serializable object that needs to be JSON encoded and assigned to body.
+      if :attr:`Request.body` argument is provided this parameter will be ignored.
+      if :attr:`Request.body` argument is not provided and data argument is provided :attr:`Request.method` will be
+      set to ``'POST'`` automatically.
+   :type data: object
+
+   :param dumps_kwargs: Parameters that will be passed to underlying :func:`json.dumps` method which is used to serialize
+       data into JSON format.
+   :type dumps_kwargs: dict
+
+JsonRequest usage example
+-------------------------
+
+Sending a JSON POST request with a JSON payload::
+
+   data = {
+       'name1': 'value1',
+       'name2': 'value2',
+   }
+   yield JsonRequest(url='http://www.example.com/post/action', data=data)
+
 
 Response objects
 ================
 
-.. class:: Response(url, [status=200, headers, body, flags])
+.. autoclass:: Response
 
     A :class:`Response` object represents an HTTP response, which is usually
     downloaded (by the Downloader) and fed to the Spiders for processing.
 
     :param url: the URL of this response
-    :type url: string
+    :type url: str
+
+    :param status: the HTTP status of the response. Defaults to ``200``.
+    :type status: int
 
     :param headers: the headers of this response. The dict values can be strings
        (for single valued headers) or lists (for multi-valued headers).
     :type headers: dict
 
-    :param status: the HTTP status of the response. Defaults to ``200``.
-    :type status: integer
-
-    :param body: the response body. It must be str, not unicode, unless you're
-       using a encoding-aware :ref:`Response subclass
-       <topics-request-response-ref-response-subclasses>`, such as
-       :class:`TextResponse`.
-    :type body: str
-
-    :param meta: the initial values for the :attr:`Response.meta` attribute. If
-       given, the dict will be shallow copied.
-    :type meta: dict
+    :param body: the response body. To access the decoded text as a string, use
+       ``response.text`` from an encoding-aware
+       :ref:`Response subclass <topics-request-response-ref-response-subclasses>`,
+       such as :class:`TextResponse`.
+    :type body: bytes
 
     :param flags: is a list containing the initial values for the
        :attr:`Response.flags` attribute. If given, the list will be shallow
        copied.
     :type flags: list
 
+    :param request: the initial value of the :attr:`Response.request` attribute.
+        This represents the :class:`Request` that generated this response.
+    :type request: scrapy.http.Request
+
+    :param certificate: an object representing the server's SSL certificate.
+    :type certificate: twisted.internet.ssl.Certificate
+
+    :param ip_address: The IP address of the server from which the Response originated.
+    :type ip_address: :class:`ipaddress.IPv4Address` or :class:`ipaddress.IPv6Address`
+
+    .. versionadded:: 2.1.0
+       The ``ip_address`` parameter.
+
     .. attribute:: Response.url
 
         A string containing the URL of the response.
@@ -420,14 +710,20 @@ Response objects
 
     .. attribute:: Response.headers
 
-        A dictionary-like object which contains the response headers.
+        A dictionary-like object which contains the response headers. Values can
+        be accessed using :meth:`get` to return the first header value with the
+        specified name or :meth:`getlist` to return all header values with the
+        specified name. For example, this call will give you all cookies in the
+        headers::
+
+            response.headers.getlist('Set-Cookie')
 
     .. attribute:: Response.body
 
-        A str containing the body of this Response. Keep in mind that Response.body
-        is always a str. If you want the unicode version use
-        :meth:`TextResponse.body_as_unicode` (only available in
-        :class:`TextResponse` and subclasses).
+        The response body as bytes.
+
+        If you want the body as a string, use :attr:`TextResponse.text` (only
+        available in :class:`TextResponse` and subclasses).
 
         This attribute is read-only. To change the body of a Response use
         :meth:`replace`.
@@ -453,7 +749,7 @@ Response objects
     .. attribute:: Response.meta
 
         A shortcut to the :attr:`Request.meta` attribute of the
-        :attr:`Response.request` object (ie. ``self.request.meta``).
+        :attr:`Response.request` object (i.e. ``self.request.meta``).
 
         Unlike the :attr:`Response.request` attribute, the :attr:`Response.meta`
         attribute is propagated along redirects and retries, so you will get
@@ -461,13 +757,44 @@ Response objects
 
         .. seealso:: :attr:`Request.meta` attribute
 
+    .. attribute:: Response.cb_kwargs
+
+        .. versionadded:: 2.0
+
+        A shortcut to the :attr:`Request.cb_kwargs` attribute of the
+        :attr:`Response.request` object (i.e. ``self.request.cb_kwargs``).
+
+        Unlike the :attr:`Response.request` attribute, the
+        :attr:`Response.cb_kwargs` attribute is propagated along redirects and
+        retries, so you will get the original :attr:`Request.cb_kwargs` sent
+        from your spider.
+
+        .. seealso:: :attr:`Request.cb_kwargs` attribute
+
     .. attribute:: Response.flags
 
         A list that contains flags for this response. Flags are labels used for
-        tagging Responses. For example: `'cached'`, `'redirected`', etc. And
+        tagging Responses. For example: ``'cached'``, ``'redirected``', etc. And
         they're shown on the string representation of the Response (`__str__`
         method) which is used by the engine for logging.
 
+    .. attribute:: Response.certificate
+
+        A :class:`twisted.internet.ssl.Certificate` object representing
+        the server's SSL certificate.
+
+        Only populated for ``https`` responses, ``None`` otherwise.
+
+    .. attribute:: Response.ip_address
+
+        .. versionadded:: 2.1.0
+
+        The IP address of the server from which the Response originated.
+
+        This attribute is currently only populated by the HTTP 1.1 download
+        handler, i.e. for ``http(s)`` responses. For other handlers,
+        :attr:`ip_address` is always ``None``.
+
     .. method:: Response.copy()
 
        Returns a new Response which is a copy of this Response.
@@ -478,6 +805,21 @@ Response objects
        given new values by whichever keyword arguments are specified. The
        attribute :attr:`Response.meta` is copied by default.
 
+    .. method:: Response.urljoin(url)
+
+        Constructs an absolute url by combining the Response's :attr:`url` with
+        a possible relative url.
+
+        This is a wrapper over :func:`~urllib.parse.urljoin`, it's merely an alias for
+        making this call::
+
+            urllib.parse.urljoin(response.url, url)
+
+    .. automethod:: Response.follow
+
+    .. automethod:: Response.follow_all
+
+
 .. _topics-request-response-ref-response-subclasses:
 
 Response subclasses
@@ -495,29 +837,46 @@ TextResponse objects
     :class:`Response` class, which is meant to be used only for binary data,
     such as images, sounds or any media file.
 
-    :class:`TextResponse` objects support a new constructor argument, in
+    :class:`TextResponse` objects support a new ``__init__`` method argument, in
     addition to the base :class:`Response` objects. The remaining functionality
     is the same as for the :class:`Response` class and is not documented here.
 
     :param encoding: is a string which contains the encoding to use for this
-       response. If you create a :class:`TextResponse` object with a unicode
-       body, it will be encoded using this encoding (remember the body attribute
-       is always a string). If ``encoding`` is ``None`` (default value), the
-       encoding will be looked up in the response headers and body instead.
-    :type encoding: string
+       response. If you create a :class:`TextResponse` object with a string as
+       body, it will be converted to bytes encoded using this encoding. If
+       *encoding* is ``None`` (default), the encoding will be looked up in the
+       response headers and body instead.
+    :type encoding: str
 
     :class:`TextResponse` objects support the following attributes in addition
     to the standard :class:`Response` ones:
 
+    .. attribute:: TextResponse.text
+
+       Response body, as a string.
+
+       The same as ``response.body.decode(response.encoding)``, but the
+       result is cached after the first call, so you can access
+       ``response.text`` multiple times without extra overhead.
+
+       .. note::
+
+            ``str(response.body)`` is not a correct way to convert the response
+            body into a string:
+
+            >>> str(b'body')
+            "b'body'"
+
+
     .. attribute:: TextResponse.encoding
 
        A string with the encoding of this response. The encoding is resolved by
        trying the following mechanisms, in order:
 
-       1. the encoding passed in the constructor `encoding` argument
+       1. the encoding passed in the ``__init__`` method ``encoding`` argument
 
        2. the encoding declared in the Content-Type HTTP header. If this
-          encoding is not valid (ie. unknown), it is ignored and the next
+          encoding is not valid (i.e. unknown), it is ignored and the next
           resolution mechanism is tried.
 
        3. the encoding declared in the response body. The TextResponse class
@@ -535,20 +894,6 @@ TextResponse objects
     :class:`TextResponse` objects support the following methods in addition to
     the standard :class:`Response` ones:
 
-    .. method:: TextResponse.body_as_unicode()
-
-        Returns the body of the response as unicode. This is equivalent to::
-
-            response.body.decode(response.encoding)
-
-        But **not** equivalent to::
-
-            unicode(response.body)
-
-        Since, in the latter case, you would be using you system default encoding
-        (typically `ascii`) to convert the body to unicode, instead of the response
-        encoding.
-
     .. method:: TextResponse.xpath(query)
 
         A shortcut to ``TextResponse.selector.xpath(query)``::
@@ -561,6 +906,15 @@ TextResponse objects
 
             response.css('p')
 
+    .. automethod:: TextResponse.follow
+
+    .. automethod:: TextResponse.follow_all
+
+    .. automethod:: TextResponse.json()
+
+        Returns a Python object from deserialized JSON document.
+        The result is cached after the first call.
+
 
 HtmlResponse objects
 --------------------
@@ -571,7 +925,7 @@ HtmlResponse objects
     which adds encoding auto-discovering support by looking into the HTML `meta
     http-equiv`_ attribute.  See :attr:`TextResponse.encoding`.
 
-.. _meta http-equiv: http://www.w3schools.com/TAGS/att_meta_http_equiv.asp
+.. _meta http-equiv: https://www.w3schools.com/TAGS/att_meta_http_equiv.asp
 
 XmlResponse objects
 -------------------
@@ -582,4 +936,4 @@ XmlResponse objects
     adds encoding auto-discovering support by looking into the XML declaration
     line.  See :attr:`TextResponse.encoding`.
 
-.. _Twisted Failure: http://twistedmatrix.com/documents/current/api/twisted.python.failure.Failure.html
+.. _bug in lxml: https://bugs.launchpad.net/lxml/+bug/1665241
diff --git a/docs/topics/scrapyd.rst b/docs/topics/scrapyd.rst
index 2b7ded9dd..a3d6f7698 100644
--- a/docs/topics/scrapyd.rst
+++ b/docs/topics/scrapyd.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 .. _topics-scrapyd:
 
 =======
@@ -8,4 +10,4 @@ Scrapyd has been moved into a separate project.
 
 Its documentation is now hosted at:
 
-    http://scrapyd.readthedocs.org/
+    https://scrapyd.readthedocs.io/en/latest/
diff --git a/docs/topics/selectors.rst b/docs/topics/selectors.rst
index d966a67d2..9e2c6ba42 100644
--- a/docs/topics/selectors.rst
+++ b/docs/topics/selectors.rst
@@ -6,16 +6,16 @@ Selectors
 
 When you're scraping web pages, the most common task you need to perform is
 to extract data from the HTML source. There are several libraries available to
-achieve this:
+achieve this, such as:
 
- * `BeautifulSoup`_ is a very popular screen scraping library among Python
+ * `BeautifulSoup`_ is a very popular web scraping library among Python
    programmers which constructs a Python object based on the structure of the
    HTML code and also deals with bad markup reasonably well, but it has one
    drawback: it's slow.
 
- * `lxml`_ is a XML parsing library (which also parses HTML) with a pythonic
-   API based on `ElementTree`_ (which is not part of the Python standard
-   library).
+ * `lxml`_ is an XML parsing library (which also parses HTML) with a pythonic
+   API based on :mod:`~xml.etree.ElementTree`. (lxml is not part of the Python standard
+   library.)
 
 Scrapy comes with its own mechanism for extracting data. They're called
 selectors because they "select" certain parts of the HTML document specified
@@ -25,24 +25,20 @@ either by `XPath`_ or `CSS`_ expressions.
 used with HTML. `CSS`_ is a language for applying styles to HTML documents. It
 defines selectors to associate those styles with specific HTML elements.
 
-Scrapy selectors are built over the `lxml`_ library, which means they're very
-similar in speed and parsing accuracy.
+.. note::
+    Scrapy Selectors is a thin wrapper around `parsel`_ library; the purpose of
+    this wrapper is to provide better integration with Scrapy Response objects.
 
-This page explains how selectors work and describes their API which is very
-small and simple, unlike the `lxml`_ API which is much bigger because the
-`lxml`_ library can be used for many other tasks, besides selecting markup
-documents.
-
-For a complete reference of the selectors API see
-:ref:`Selector reference <topics-selectors-ref>`
-
-.. _BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/
-.. _lxml: http://lxml.de/
-.. _ElementTree: http://docs.python.org/library/xml.etree.elementtree.html
-.. _cssselect: https://pypi.python.org/pypi/cssselect/
-.. _XPath: http://www.w3.org/TR/xpath
-.. _CSS: http://www.w3.org/TR/selectors
+    `parsel`_ is a stand-alone web scraping library which can be used without
+    Scrapy. It uses `lxml`_ library under the hood, and implements an
+    easy API on top of lxml API. It means Scrapy selectors are very similar
+    in speed and parsing accuracy to lxml.
 
+.. _BeautifulSoup: https://www.crummy.com/software/BeautifulSoup/
+.. _lxml: https://lxml.de/
+.. _XPath: https://www.w3.org/TR/xpath/all/
+.. _CSS: https://www.w3.org/TR/selectors
+.. _parsel: https://parsel.readthedocs.io/en/latest/
 
 Using selectors
 ===============
@@ -52,45 +48,62 @@ Constructing selectors
 
 .. highlight:: python
 
+Response objects expose a :class:`~scrapy.selector.Selector` instance
+on ``.selector`` attribute:
+
+>>> response.selector.xpath('//span/text()').get()
+'good'
+
+Querying responses using XPath and CSS is so common that responses include two
+more shortcuts: ``response.xpath()`` and ``response.css()``:
+
+>>> response.xpath('//span/text()').get()
+'good'
+>>> response.css('span::text').get()
+'good'
+
 Scrapy selectors are instances of :class:`~scrapy.selector.Selector` class
-constructed by passing **text** or :class:`~scrapy.http.TextResponse`
-object. It automatically chooses the best parsing rules (XML vs HTML) based on
-input type::
+constructed by passing either :class:`~scrapy.http.TextResponse` object or
+markup as a string (in ``text`` argument).
 
-    >>> from scrapy.selector import Selector
-    >>> from scrapy.http import HtmlResponse
+Usually there is no need to construct Scrapy selectors manually:
+``response`` object is available in Spider callbacks, so in most cases
+it is more convenient to use ``response.css()`` and ``response.xpath()``
+shortcuts. By using ``response.selector`` or one of these shortcuts
+you can also ensure the response body is parsed only once.
 
-Constructing from text::
+But if required, it is possible to use ``Selector`` directly.
+Constructing from text:
 
-    >>> body = '<html><body><span>good</span></body></html>'
-    >>> Selector(text=body).xpath('//span/text()').extract()
-    [u'good']
+>>> from scrapy.selector import Selector
+>>> body = '<html><body><span>good</span></body></html>'
+>>> Selector(text=body).xpath('//span/text()').get()
+'good'
 
-Constructing from response::
+Constructing from response - :class:`~scrapy.http.HtmlResponse` is one of
+:class:`~scrapy.http.TextResponse` subclasses:
 
-    >>> response = HtmlResponse(url='http://example.com', body=body)
-    >>> Selector(response=response).xpath('//span/text()').extract()
-    [u'good']
-
-For convenience, response objects exposes a selector on `.selector` attribute,
-it's totally OK to use this shortcut when possible::
-
-    >>> response.selector.xpath('//span/text()').extract()
-    [u'good']
+>>> from scrapy.selector import Selector
+>>> from scrapy.http import HtmlResponse
+>>> response = HtmlResponse(url='http://example.com', body=body)
+>>> Selector(response=response).xpath('//span/text()').get()
+'good'
 
+``Selector`` automatically chooses the best parsing rules
+(XML vs HTML) based on input type.
 
 Using selectors
 ---------------
 
-To explain how to use the selectors we'll use the `Scrapy shell` (which
+To explain how to use the selectors we'll use the ``Scrapy shell`` (which
 provides interactive testing) and an example page located in the Scrapy
 documentation server:
 
-    http://doc.scrapy.org/en/latest/_static/selectors-sample1.html
+    https://docs.scrapy.org/en/latest/_static/selectors-sample1.html
 
 .. _topics-selectors-htmlcode:
 
-Here's its HTML code:
+For the sake of completeness, here's its full HTML code:
 
 .. literalinclude:: ../_static/selectors-sample1.html
    :language: html
@@ -99,7 +112,7 @@ Here's its HTML code:
 
 First, let's open the shell::
 
-    scrapy shell http://doc.scrapy.org/en/latest/_static/selectors-sample1.html
+    scrapy shell https://docs.scrapy.org/en/latest/_static/selectors-sample1.html
 
 Then, after the shell loads, you'll have the response available as ``response``
 shell variable, and its attached selector in ``response.selector`` attribute.
@@ -109,122 +122,352 @@ Since we're dealing with HTML, the selector will automatically use an HTML parse
 .. highlight:: python
 
 So, by looking at the :ref:`HTML code <topics-selectors-htmlcode>` of that
-page, let's construct an XPath for selecting the text inside the title tag::
+page, let's construct an XPath for selecting the text inside the title tag:
 
-    >>> response.selector.xpath('//title/text()')
-    [<Selector (text) xpath=//title/text()>]
+>>> response.xpath('//title/text()')
+[<Selector xpath='//title/text()' data='Example website'>]
 
-Querying responses using XPath and CSS is so common that responses includes two
-convenient shortcuts: ``response.xpath()`` and ``response.css()``::
+To actually extract the textual data, you must call the selector ``.get()``
+or ``.getall()`` methods, as follows:
 
-    >>> response.xpath('//title/text()')
-    [<Selector (text) xpath=//title/text()>]
-    >>> response.css('title::text')
-    [<Selector (text) xpath=//title/text()>]
+>>> response.xpath('//title/text()').getall()
+['Example website']
+>>> response.xpath('//title/text()').get()
+'Example website'
 
-As you can see, ``.xpath()`` and ``.css()`` methods returns an
-:class:`~scrapy.selector.SelectorList` instance, which is a list of new
-selectors. This API can be used quickly for selecting nested data::
-
-    >>> response.css('img').xpath('@src').extract()
-    [u'image1_thumb.jpg',
-     u'image2_thumb.jpg',
-     u'image3_thumb.jpg',
-     u'image4_thumb.jpg',
-     u'image5_thumb.jpg']
-
-To actually extract the textual data, you must call the selector ``.extract()``
-method, as follows::
-
-    >>> response.xpath('//title/text()').extract()
-    [u'Example website']
+``.get()`` always returns a single result; if there are several matches,
+content of a first match is returned; if there are no matches, None
+is returned. ``.getall()`` returns a list with all results.
 
 Notice that CSS selectors can select text or attribute nodes using CSS3
-pseudo-elements::
+pseudo-elements:
 
-    >>> response.css('title::text').extract()
-    [u'Example website']
+>>> response.css('title::text').get()
+'Example website'
 
-Now we're going to get the base URL and some image links::
+As you can see, ``.xpath()`` and ``.css()`` methods return a
+:class:`~scrapy.selector.SelectorList` instance, which is a list of new
+selectors. This API can be used for quickly selecting nested data:
 
-    >>> response.xpath('//base/@href').extract()
-    [u'http://example.com/']
+>>> response.css('img').xpath('@src').getall()
+['image1_thumb.jpg',
+ 'image2_thumb.jpg',
+ 'image3_thumb.jpg',
+ 'image4_thumb.jpg',
+ 'image5_thumb.jpg']
 
-    >>> response.css('base::attr(href)').extract()
-    [u'http://example.com/']
+If you want to extract only the first matched element, you can call the
+selector ``.get()`` (or its alias ``.extract_first()`` commonly used in
+previous Scrapy versions):
 
-    >>> response.xpath('//a[contains(@href, "image")]/@href').extract()
-    [u'image1.html',
-     u'image2.html',
-     u'image3.html',
-     u'image4.html',
-     u'image5.html']
+>>> response.xpath('//div[@id="images"]/a/text()').get()
+'Name: My image 1 '
 
-    >>> response.css('a[href*=image]::attr(href)').extract()
-    [u'image1.html',
-     u'image2.html',
-     u'image3.html',
-     u'image4.html',
-     u'image5.html']
+It returns ``None`` if no element was found:
 
-    >>> response.xpath('//a[contains(@href, "image")]/img/@src').extract()
-    [u'image1_thumb.jpg',
-     u'image2_thumb.jpg',
-     u'image3_thumb.jpg',
-     u'image4_thumb.jpg',
-     u'image5_thumb.jpg']
+>>> response.xpath('//div[@id="not-exists"]/text()').get() is None
+True
 
-    >>> response.css('a[href*=image] img::attr(src)').extract()
-    [u'image1_thumb.jpg',
-     u'image2_thumb.jpg',
-     u'image3_thumb.jpg',
-     u'image4_thumb.jpg',
-     u'image5_thumb.jpg']
+A default return value can be provided as an argument, to be used instead
+of ``None``:
+
+>>> response.xpath('//div[@id="not-exists"]/text()').get(default='not-found')
+'not-found'
+
+Instead of using e.g. ``'@src'`` XPath it is possible to query for attributes
+using ``.attrib`` property of a :class:`~scrapy.selector.Selector`:
+
+>>> [img.attrib['src'] for img in response.css('img')]
+['image1_thumb.jpg',
+ 'image2_thumb.jpg',
+ 'image3_thumb.jpg',
+ 'image4_thumb.jpg',
+ 'image5_thumb.jpg']
+
+As a shortcut, ``.attrib`` is also available on SelectorList directly;
+it returns attributes for the first matching element:
+
+>>> response.css('img').attrib['src']
+'image1_thumb.jpg'
+
+This is most useful when only a single result is expected, e.g. when selecting
+by id, or selecting unique elements on a web page:
+
+>>> response.css('base').attrib['href']
+'http://example.com/'
+
+Now we're going to get the base URL and some image links:
+
+>>> response.xpath('//base/@href').get()
+'http://example.com/'
+
+>>> response.css('base::attr(href)').get()
+'http://example.com/'
+
+>>> response.css('base').attrib['href']
+'http://example.com/'
+
+>>> response.xpath('//a[contains(@href, "image")]/@href').getall()
+['image1.html',
+ 'image2.html',
+ 'image3.html',
+ 'image4.html',
+ 'image5.html']
+
+>>> response.css('a[href*=image]::attr(href)').getall()
+['image1.html',
+ 'image2.html',
+ 'image3.html',
+ 'image4.html',
+ 'image5.html']
+
+>>> response.xpath('//a[contains(@href, "image")]/img/@src').getall()
+['image1_thumb.jpg',
+ 'image2_thumb.jpg',
+ 'image3_thumb.jpg',
+ 'image4_thumb.jpg',
+ 'image5_thumb.jpg']
+
+>>> response.css('a[href*=image] img::attr(src)').getall()
+['image1_thumb.jpg',
+ 'image2_thumb.jpg',
+ 'image3_thumb.jpg',
+ 'image4_thumb.jpg',
+ 'image5_thumb.jpg']
+
+.. _topics-selectors-css-extensions:
+
+Extensions to CSS Selectors
+---------------------------
+
+Per W3C standards, `CSS selectors`_ do not support selecting text nodes
+or attribute values.
+But selecting these is so essential in a web scraping context
+that Scrapy (parsel) implements a couple of **non-standard pseudo-elements**:
+
+* to select text nodes, use ``::text``
+* to select attribute values, use ``::attr(name)`` where *name* is the
+  name of the attribute that you want the value of
+
+.. warning::
+    These pseudo-elements are Scrapy-/Parsel-specific.
+    They will most probably not work with other libraries like
+    `lxml`_ or `PyQuery`_.
+
+.. _PyQuery: https://pypi.org/project/pyquery/
+
+Examples:
+
+* ``title::text`` selects children text nodes of a descendant ``<title>`` element:
+
+>>> response.css('title::text').get()
+'Example website'
+
+* ``*::text`` selects all descendant text nodes of the current selector context:
+
+>>> response.css('#images *::text').getall()
+['\n   ',
+ 'Name: My image 1 ',
+ '\n   ',
+ 'Name: My image 2 ',
+ '\n   ',
+ 'Name: My image 3 ',
+ '\n   ',
+ 'Name: My image 4 ',
+ '\n   ',
+ 'Name: My image 5 ',
+ '\n  ']
+
+* ``foo::text`` returns no results if ``foo`` element exists, but contains
+  no text (i.e. text is empty):
+
+>>> response.css('img::text').getall()
+[]
+
+  This means ``.css('foo::text').get()`` could return None even if an element
+  exists. Use ``default=''`` if you always want a string:
+
+>>> response.css('img::text').get()
+>>> response.css('img::text').get(default='')
+''
+
+* ``a::attr(href)`` selects the *href* attribute value of descendant links:
+
+>>> response.css('a::attr(href)').getall()
+['image1.html',
+ 'image2.html',
+ 'image3.html',
+ 'image4.html',
+ 'image5.html']
+
+.. note::
+    See also: :ref:`selecting-attributes`.
+
+.. note::
+    You cannot chain these pseudo-elements. But in practice it would not
+    make much sense: text nodes do not have attributes, and attribute values
+    are string values already and do not have children nodes.
+
+.. _CSS Selectors: https://www.w3.org/TR/selectors-3/#selectors
 
 .. _topics-selectors-nesting-selectors:
 
 Nesting selectors
 -----------------
 
-The selection methods (``.xpath()`` or ``.css()``) returns a list of selectors
+The selection methods (``.xpath()`` or ``.css()``) return a list of selectors
 of the same type, so you can call the selection methods for those selectors
-too. Here's an example::
+too. Here's an example:
 
-    >>> links = response.xpath('//a[contains(@href, "image")]')
-    >>> links.extract()
-    [u'<a href="image1.html">Name: My image 1 <br><img src="image1_thumb.jpg"></a>',
-     u'<a href="image2.html">Name: My image 2 <br><img src="image2_thumb.jpg"></a>',
-     u'<a href="image3.html">Name: My image 3 <br><img src="image3_thumb.jpg"></a>',
-     u'<a href="image4.html">Name: My image 4 <br><img src="image4_thumb.jpg"></a>',
-     u'<a href="image5.html">Name: My image 5 <br><img src="image5_thumb.jpg"></a>']
+>>> links = response.xpath('//a[contains(@href, "image")]')
+>>> links.getall()
+['<a href="image1.html">Name: My image 1 <br><img src="image1_thumb.jpg"></a>',
+ '<a href="image2.html">Name: My image 2 <br><img src="image2_thumb.jpg"></a>',
+ '<a href="image3.html">Name: My image 3 <br><img src="image3_thumb.jpg"></a>',
+ '<a href="image4.html">Name: My image 4 <br><img src="image4_thumb.jpg"></a>',
+ '<a href="image5.html">Name: My image 5 <br><img src="image5_thumb.jpg"></a>']
 
-    >>> for index, link in enumerate(links):
-    ...     args = (index, link.xpath('@href').extract(), link.xpath('img/@src').extract())
-    ...     print 'Link number %d points to url %s and image %s' % args
+>>> for index, link in enumerate(links):
+...     args = (index, link.xpath('@href').get(), link.xpath('img/@src').get())
+...     print('Link number %d points to url %r and image %r' % args)
+Link number 0 points to url 'image1.html' and image 'image1_thumb.jpg'
+Link number 1 points to url 'image2.html' and image 'image2_thumb.jpg'
+Link number 2 points to url 'image3.html' and image 'image3_thumb.jpg'
+Link number 3 points to url 'image4.html' and image 'image4_thumb.jpg'
+Link number 4 points to url 'image5.html' and image 'image5_thumb.jpg'
 
-    Link number 0 points to url [u'image1.html'] and image [u'image1_thumb.jpg']
-    Link number 1 points to url [u'image2.html'] and image [u'image2_thumb.jpg']
-    Link number 2 points to url [u'image3.html'] and image [u'image3_thumb.jpg']
-    Link number 3 points to url [u'image4.html'] and image [u'image4_thumb.jpg']
-    Link number 4 points to url [u'image5.html'] and image [u'image5_thumb.jpg']
+.. _selecting-attributes:
+
+Selecting element attributes
+----------------------------
+
+There are several ways to get a value of an attribute. First, one can use
+XPath syntax:
+
+>>> response.xpath("//a/@href").getall()
+['image1.html', 'image2.html', 'image3.html', 'image4.html', 'image5.html']
+
+XPath syntax has a few advantages: it is a standard XPath feature, and
+``@attributes`` can be used in other parts of an XPath expression - e.g.
+it is possible to filter by attribute value.
+
+Scrapy also provides an extension to CSS selectors (``::attr(...)``)
+which allows to get attribute values:
+
+>>> response.css('a::attr(href)').getall()
+['image1.html', 'image2.html', 'image3.html', 'image4.html', 'image5.html']
+
+In addition to that, there is a ``.attrib`` property of Selector.
+You can use it if you prefer to lookup attributes in Python
+code, without using XPaths or CSS extensions:
+
+>>> [a.attrib['href'] for a in response.css('a')]
+['image1.html', 'image2.html', 'image3.html', 'image4.html', 'image5.html']
+
+This property is also available on SelectorList; it returns a dictionary
+with attributes of a first matching element. It is convenient to use when
+a selector is expected to give a single result (e.g. when selecting by element
+ID, or when selecting an unique element on a page):
+
+>>> response.css('base').attrib
+{'href': 'http://example.com/'}
+>>> response.css('base').attrib['href']
+'http://example.com/'
+
+``.attrib`` property of an empty SelectorList is empty:
+
+>>> response.css('foo').attrib
+{}
 
 Using selectors with regular expressions
 ----------------------------------------
 
-:class:`~scrapy.selector.Selector` also have a ``.re()`` method for extracting
+:class:`~scrapy.selector.Selector` also has a ``.re()`` method for extracting
 data using regular expressions. However, unlike using ``.xpath()`` or
-``.css()`` methods, ``.re()`` method returns a list of unicode strings. So you
+``.css()`` methods, ``.re()`` returns a list of strings. So you
 can't construct nested ``.re()`` calls.
 
-Here's an example used to extract images names from the :ref:`HTML code
-<topics-selectors-htmlcode>` above::
+Here's an example used to extract image names from the :ref:`HTML code
+<topics-selectors-htmlcode>` above:
+
+>>> response.xpath('//a[contains(@href, "image")]/text()').re(r'Name:\s*(.*)')
+['My image 1',
+ 'My image 2',
+ 'My image 3',
+ 'My image 4',
+ 'My image 5']
+
+There's an additional helper reciprocating ``.get()`` (and its
+alias ``.extract_first()``) for ``.re()``, named ``.re_first()``.
+Use it to extract just the first matching string:
+
+>>> response.xpath('//a[contains(@href, "image")]/text()').re_first(r'Name:\s*(.*)')
+'My image 1'
+
+.. _old-extraction-api:
+
+extract() and extract_first()
+-----------------------------
+
+If you're a long-time Scrapy user, you're probably familiar
+with ``.extract()`` and ``.extract_first()`` selector methods. Many blog posts
+and tutorials are using them as well. These methods are still supported
+by Scrapy, there are **no plans** to deprecate them.
+
+However, Scrapy usage docs are now written using ``.get()`` and
+``.getall()`` methods. We feel that these new methods result in a more concise
+and readable code.
+
+The following examples show how these methods map to each other.
+
+1.  ``SelectorList.get()`` is the same as ``SelectorList.extract_first()``:
+
+    >>> response.css('a::attr(href)').get()
+    'image1.html'
+    >>> response.css('a::attr(href)').extract_first()
+    'image1.html'
+
+2.  ``SelectorList.getall()`` is the same as ``SelectorList.extract()``:
+
+    >>> response.css('a::attr(href)').getall()
+    ['image1.html', 'image2.html', 'image3.html', 'image4.html', 'image5.html']
+    >>> response.css('a::attr(href)').extract()
+    ['image1.html', 'image2.html', 'image3.html', 'image4.html', 'image5.html']
+
+3.  ``Selector.get()`` is the same as ``Selector.extract()``:
+
+    >>> response.css('a::attr(href)')[0].get()
+    'image1.html'
+    >>> response.css('a::attr(href)')[0].extract()
+    'image1.html'
+
+4.  For consistency, there is also ``Selector.getall()``, which returns a list:
+
+    >>> response.css('a::attr(href)')[0].getall()
+    ['image1.html']
+
+So, the main difference is that output of ``.get()`` and ``.getall()`` methods
+is more predictable: ``.get()`` always returns a single result, ``.getall()``
+always returns a list of all extracted results. With ``.extract()`` method
+it was not always obvious if a result is a list or not; to get a single
+result either ``.extract()`` or ``.extract_first()`` should be called.
+
+
+.. _topics-selectors-xpaths:
+
+Working with XPaths
+===================
+
+Here are some tips which may help you to use XPath with Scrapy selectors
+effectively. If you are not much familiar with XPath yet,
+you may want to take a look first at this `XPath tutorial`_.
+
+.. note::
+    Some of the tips are based on `this post from ScrapingHub's blog`_.
+
+.. _`XPath tutorial`: http://www.zvon.org/comp/r/tut-XPath_1.html
+.. _`this post from ScrapingHub's blog`: https://blog.scrapinghub.com/2014/07/17/xpath-tips-from-the-web-scraping-trenches/
 
-    >>> response.xpath('//a[contains(@href, "image")]/text()').re(r'Name:\s*(.*)')
-    [u'My image 1',
-     u'My image 2',
-     u'My image 3',
-     u'My image 4',
-     u'My image 5']
 
 .. _topics-selectors-relative-xpaths:
 
@@ -236,36 +479,243 @@ with ``/``, that XPath will be absolute to the document and not relative to the
 ``Selector`` you're calling it from.
 
 For example, suppose you want to extract all ``<p>`` elements inside ``<div>``
-elements. First, you would get all ``<div>`` elements::
+elements. First, you would get all ``<div>`` elements:
 
-    >>> divs = response.xpath('//div')
+>>> divs = response.xpath('//div')
 
 At first, you may be tempted to use the following approach, which is wrong, as
 it actually extracts all ``<p>`` elements from the document, not only those
-inside ``<div>`` elements::
+inside ``<div>`` elements:
 
-    >>> for p in divs.xpath('//p'):  # this is wrong - gets all <p> from the whole document
-    ...     print p.extract()
+>>> for p in divs.xpath('//p'):  # this is wrong - gets all <p> from the whole document
+...     print(p.get())
 
-This is the proper way to do it (note the dot prefixing the ``.//p`` XPath)::
+This is the proper way to do it (note the dot prefixing the ``.//p`` XPath):
 
-    >>> for p in divs.xpath('.//p'):  # extracts all <p> inside
-    ...     print p.extract()
+>>> for p in divs.xpath('.//p'):  # extracts all <p> inside
+...     print(p.get())
 
-Another common case would be to extract all direct ``<p>`` children::
+Another common case would be to extract all direct ``<p>`` children:
 
-    >>> for p in divs.xpath('p'):
-    ...     print p.extract()
+>>> for p in divs.xpath('p'):
+...     print(p.get())
 
 For more details about relative XPaths see the `Location Paths`_ section in the
 XPath specification.
 
-.. _Location Paths: http://www.w3.org/TR/xpath#location-paths
+.. _Location Paths: https://www.w3.org/TR/xpath/all/#location-paths
+
+When querying by class, consider using CSS
+------------------------------------------
+
+Because an element can contain multiple CSS classes, the XPath way to select elements
+by class is the rather verbose::
+
+    *[contains(concat(' ', normalize-space(@class), ' '), ' someclass ')]
+
+If you use ``@class='someclass'`` you may end up missing elements that have
+other classes, and if you just use ``contains(@class, 'someclass')`` to make up
+for that you may end up with more elements that you want, if they have a different
+class name that shares the string ``someclass``.
+
+As it turns out, Scrapy selectors allow you to chain selectors, so most of the time
+you can just select by class using CSS and then switch to XPath when needed:
+
+>>> from scrapy import Selector
+>>> sel = Selector(text='<div class="hero shout"><time datetime="2014-07-23 19:00">Special date</time></div>')
+>>> sel.css('.shout').xpath('./time/@datetime').getall()
+['2014-07-23 19:00']
+
+This is cleaner than using the verbose XPath trick shown above. Just remember
+to use the ``.`` in the XPath expressions that will follow.
+
+Beware of the difference between //node[1] and (//node)[1]
+----------------------------------------------------------
+
+``//node[1]`` selects all the nodes occurring first under their respective parents.
+
+``(//node)[1]`` selects all the nodes in the document, and then gets only the first of them.
+
+Example:
+
+>>> from scrapy import Selector
+>>> sel = Selector(text="""
+....:     <ul class="list">
+....:         <li>1</li>
+....:         <li>2</li>
+....:         <li>3</li>
+....:     </ul>
+....:     <ul class="list">
+....:         <li>4</li>
+....:         <li>5</li>
+....:         <li>6</li>
+....:     </ul>""")
+>>> xp = lambda x: sel.xpath(x).getall()
+
+This gets all first ``<li>``  elements under whatever it is its parent:
+
+>>> xp("//li[1]")
+['<li>1</li>', '<li>4</li>']
+
+And this gets the first ``<li>``  element in the whole document:
+
+>>> xp("(//li)[1]")
+['<li>1</li>']
+
+This gets all first ``<li>``  elements under an ``<ul>``  parent:
+
+>>> xp("//ul/li[1]")
+['<li>1</li>', '<li>4</li>']
+
+And this gets the first ``<li>``  element under an ``<ul>``  parent in the whole document:
+
+>>> xp("(//ul/li)[1]")
+['<li>1</li>']
+
+Using text nodes in a condition
+-------------------------------
+
+When you need to use the text content as argument to an `XPath string function`_,
+avoid using ``.//text()`` and use just ``.`` instead.
+
+This is because the expression ``.//text()`` yields a collection of text elements -- a *node-set*.
+And when a node-set is converted to a string, which happens when it is passed as argument to
+a string function like ``contains()`` or ``starts-with()``, it results in the text for the first element only.
+
+Example:
+
+>>> from scrapy import Selector
+>>> sel = Selector(text='<a href="#">Click here to go to the <strong>Next Page</strong></a>')
+
+Converting a *node-set* to string:
+
+>>> sel.xpath('//a//text()').getall() # take a peek at the node-set
+['Click here to go to the ', 'Next Page']
+>>> sel.xpath("string(//a[1]//text())").getall() # convert it to string
+['Click here to go to the ']
+
+A *node* converted to a string, however, puts together the text of itself plus of all its descendants:
+
+>>> sel.xpath("//a[1]").getall() # select the first node
+['<a href="#">Click here to go to the <strong>Next Page</strong></a>']
+>>> sel.xpath("string(//a[1])").getall() # convert it to string
+['Click here to go to the Next Page']
+
+So, using the ``.//text()`` node-set won't select anything in this case:
+
+>>> sel.xpath("//a[contains(.//text(), 'Next Page')]").getall()
+[]
+
+But using the ``.`` to mean the node, works:
+
+>>> sel.xpath("//a[contains(., 'Next Page')]").getall()
+['<a href="#">Click here to go to the <strong>Next Page</strong></a>']
+
+.. _`XPath string function`: https://www.w3.org/TR/xpath/all/#section-String-Functions
+
+.. _topics-selectors-xpath-variables:
+
+Variables in XPath expressions
+------------------------------
+
+XPath allows you to reference variables in your XPath expressions, using
+the ``$somevariable`` syntax. This is somewhat similar to parameterized
+queries or prepared statements in the SQL world where you replace
+some arguments in your queries with placeholders like ``?``,
+which are then substituted with values passed with the query.
+
+Here's an example to match an element based on its "id" attribute value,
+without hard-coding it (that was shown previously):
+
+>>> # `$val` used in the expression, a `val` argument needs to be passed
+>>> response.xpath('//div[@id=$val]/a/text()', val='images').get()
+'Name: My image 1 '
+
+Here's another example, to find the "id" attribute of a ``<div>`` tag containing
+five ``<a>`` children (here we pass the value ``5`` as an integer):
+
+>>> response.xpath('//div[count(a)=$cnt]/@id', cnt=5).get()
+'images'
+
+All variable references must have a binding value when calling ``.xpath()``
+(otherwise you'll get a ``ValueError: XPath error:`` exception).
+This is done by passing as many named arguments as necessary.
+
+`parsel`_, the library powering Scrapy selectors, has more details and examples
+on `XPath variables`_.
+
+.. _XPath variables: https://parsel.readthedocs.io/en/latest/usage.html#variables-in-xpath-expressions
+
+
+.. _removing-namespaces:
+
+Removing namespaces
+-------------------
+
+When dealing with scraping projects, it is often quite convenient to get rid of
+namespaces altogether and just work with element names, to write more
+simple/convenient XPaths. You can use the
+:meth:`Selector.remove_namespaces` method for that.
+
+Let's show an example that illustrates this with the Python Insider blog atom feed.
+
+.. highlight:: sh
+
+First, we open the shell with the url we want to scrape::
+
+    $ scrapy shell https://feeds.feedburner.com/PythonInsider
+
+This is how the file starts::
+
+    <?xml version="1.0" encoding="UTF-8"?>
+    <?xml-stylesheet ...
+    <feed xmlns="http://www.w3.org/2005/Atom"
+          xmlns:openSearch="http://a9.com/-/spec/opensearchrss/1.0/"
+          xmlns:blogger="http://schemas.google.com/blogger/2008"
+          xmlns:georss="http://www.georss.org/georss"
+          xmlns:gd="http://schemas.google.com/g/2005"
+          xmlns:thr="http://purl.org/syndication/thread/1.0"
+          xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">
+      ...
+
+You can see several namespace declarations including a default
+"http://www.w3.org/2005/Atom" and another one using the "gd:" prefix for
+"http://schemas.google.com/g/2005".
+
+.. highlight:: python
+
+Once in the shell we can try selecting all ``<link>`` objects and see that it
+doesn't work (because the Atom XML namespace is obfuscating those nodes):
+
+>>> response.xpath("//link")
+[]
+
+But once we call the :meth:`Selector.remove_namespaces` method, all
+nodes can be accessed directly by their names:
+
+>>> response.selector.remove_namespaces()
+>>> response.xpath("//link")
+[<Selector xpath='//link' data='<link rel="alternate" type="text/html" h'>,
+    <Selector xpath='//link' data='<link rel="next" type="application/atom+'>,
+    ...
+
+If you wonder why the namespace removal procedure isn't always called by default
+instead of having to call it manually, this is because of two reasons, which, in order
+of relevance, are:
+
+1. Removing namespaces requires to iterate and modify all nodes in the
+   document, which is a reasonably expensive operation to perform by default
+   for all documents crawled by Scrapy
+
+2. There could be some cases where using namespaces is actually required, in
+   case some element names clash between namespaces. These cases are very rare
+   though.
+
 
 Using EXSLT extensions
 ----------------------
 
-Being built atop `lxml`_, Scrapy selectors also support some `EXSLT`_ extensions
+Being built atop `lxml`_, Scrapy selectors support some `EXSLT`_ extensions
 and come with these pre-registered namespaces to use in XPath expressions:
 
 
@@ -279,29 +729,28 @@ set     \http://exslt.org/sets                   `set manipulation`_
 Regular expressions
 ~~~~~~~~~~~~~~~~~~~
 
-The ``test()`` function for example can prove quite useful when XPath's
+The ``test()`` function, for example, can prove quite useful when XPath's
 ``starts-with()`` or ``contains()`` are not sufficient.
 
-Example selecting links in list item with a "class" attribute ending with a digit::
+Example selecting links in list item with a "class" attribute ending with a digit:
 
-    >>> from scrapy import Selector
-    >>> doc = """
-    ... <div>
-    ...     <ul>
-    ...         <li class="item-0"><a href="link1.html">first item</a></li>
-    ...         <li class="item-1"><a href="link2.html">second item</a></li>
-    ...         <li class="item-inactive"><a href="link3.html">third item</a></li>
-    ...         <li class="item-1"><a href="link4.html">fourth item</a></li>
-    ...         <li class="item-0"><a href="link5.html">fifth item</a></li>
-    ...     </ul>
-    ... </div>
-    ... """
-    >>> sel = Selector(text=doc, type="html")
-    >>> sel.xpath('//li//@href').extract()
-    [u'link1.html', u'link2.html', u'link3.html', u'link4.html', u'link5.html']
-    >>> sel.xpath('//li[re:test(@class, "item-\d$")]//@href').extract()
-    [u'link1.html', u'link2.html', u'link4.html', u'link5.html']
-    >>>
+>>> from scrapy import Selector
+>>> doc = """
+... <div>
+...     <ul>
+...         <li class="item-0"><a href="link1.html">first item</a></li>
+...         <li class="item-1"><a href="link2.html">second item</a></li>
+...         <li class="item-inactive"><a href="link3.html">third item</a></li>
+...         <li class="item-1"><a href="link4.html">fourth item</a></li>
+...         <li class="item-0"><a href="link5.html">fifth item</a></li>
+...     </ul>
+... </div>
+... """
+>>> sel = Selector(text=doc, type="html")
+>>> sel.xpath('//li//@href').getall()
+['link1.html', 'link2.html', 'link3.html', 'link4.html', 'link5.html']
+>>> sel.xpath('//li[re:test(@class, "item-\d$")]//@href').getall()
+['link1.html', 'link2.html', 'link4.html', 'link5.html']
 
 .. warning:: C library ``libxslt`` doesn't natively support EXSLT regular
     expressions so `lxml`_'s implementation uses hooks to Python's ``re`` module.
@@ -314,7 +763,7 @@ Set operations
 These can be handy for excluding parts of a document tree before
 extracting text elements for example.
 
-Example extracting microdata (sample content taken from http://schema.org/Product)
+Example extracting microdata (sample content taken from https://schema.org/Product)
 with groups of itemscopes and corresponding itemprops::
 
     >>> doc = """
@@ -367,167 +816,83 @@ with groups of itemscopes and corresponding itemprops::
     ...   ...
     ... </div>
     ... """
-    >>>
+    >>> sel = Selector(text=doc, type="html")
     >>> for scope in sel.xpath('//div[@itemscope]'):
-    ...     print "current scope:", scope.xpath('@itemtype').extract()
+    ...     print("current scope:", scope.xpath('@itemtype').getall())
     ...     props = scope.xpath('''
     ...                 set:difference(./descendant::*/@itemprop,
     ...                                .//*[@itemscope]/*/@itemprop)''')
-    ...     print "    properties:", props.extract()
-    ...     print
+    ...     print("    properties: %s" % (props.getall()))
+    ...     print("")
 
-    current scope: [u'http://schema.org/Product']
-        properties: [u'name', u'aggregateRating', u'offers', u'description', u'review', u'review']
+    current scope: ['http://schema.org/Product']
+        properties: ['name', 'aggregateRating', 'offers', 'description', 'review', 'review']
 
-    current scope: [u'http://schema.org/AggregateRating']
-        properties: [u'ratingValue', u'reviewCount']
+    current scope: ['http://schema.org/AggregateRating']
+        properties: ['ratingValue', 'reviewCount']
 
-    current scope: [u'http://schema.org/Offer']
-        properties: [u'price', u'availability']
+    current scope: ['http://schema.org/Offer']
+        properties: ['price', 'availability']
 
-    current scope: [u'http://schema.org/Review']
-        properties: [u'name', u'author', u'datePublished', u'reviewRating', u'description']
+    current scope: ['http://schema.org/Review']
+        properties: ['name', 'author', 'datePublished', 'reviewRating', 'description']
 
-    current scope: [u'http://schema.org/Rating']
-        properties: [u'worstRating', u'ratingValue', u'bestRating']
+    current scope: ['http://schema.org/Rating']
+        properties: ['worstRating', 'ratingValue', 'bestRating']
 
-    current scope: [u'http://schema.org/Review']
-        properties: [u'name', u'author', u'datePublished', u'reviewRating', u'description']
+    current scope: ['http://schema.org/Review']
+        properties: ['name', 'author', 'datePublished', 'reviewRating', 'description']
 
-    current scope: [u'http://schema.org/Rating']
-        properties: [u'worstRating', u'ratingValue', u'bestRating']
+    current scope: ['http://schema.org/Rating']
+        properties: ['worstRating', 'ratingValue', 'bestRating']
 
-    >>>
 
 Here we first iterate over ``itemscope`` elements, and for each one,
 we look for all ``itemprops`` elements and exclude those that are themselves
 inside another ``itemscope``.
 
-.. _EXSLT: http://www.exslt.org/
-.. _regular expressions: http://www.exslt.org/regexp/index.html
-.. _set manipulation: http://www.exslt.org/set/index.html
+.. _EXSLT: http://exslt.org/
+.. _regular expressions: http://exslt.org/regexp/index.html
+.. _set manipulation: http://exslt.org/set/index.html
 
+Other XPath extensions
+----------------------
 
-Some XPath tips
----------------
+Scrapy selectors also provide a sorely missed XPath extension function
+``has-class`` that returns ``True`` for nodes that have all of the specified
+HTML classes.
 
-Here are some tips that you may find useful when using XPath
-with Scrapy selectors, based on `this post from ScrapingHub's blog`_.
-If you are not much familiar with XPath yet,
-you may want to take a look first at this `XPath tutorial`_.
+.. highlight:: html
 
+For the following HTML::
 
-.. _`XPath tutorial`: http://www.zvon.org/comp/r/tut-XPath_1.html
-.. _`this post from ScrapingHub's blog`: http://blog.scrapinghub.com/2014/07/17/xpath-tips-from-the-web-scraping-trenches/
+    <p class="foo bar-baz">First</p>
+    <p class="foo">Second</p>
+    <p class="bar">Third</p>
+    <p>Fourth</p>
 
+.. highlight:: python
 
-Using text nodes in a condition
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+You can use it like this:
 
-When you need to use the text content as argument to a `XPath string function`_,
-avoid using ``.//text()`` and use just ``.`` instead.
+>>> response.xpath('//p[has-class("foo")]')
+[<Selector xpath='//p[has-class("foo")]' data='<p class="foo bar-baz">First</p>'>,
+ <Selector xpath='//p[has-class("foo")]' data='<p class="foo">Second</p>'>]
+>>> response.xpath('//p[has-class("foo", "bar-baz")]')
+[<Selector xpath='//p[has-class("foo", "bar-baz")]' data='<p class="foo bar-baz">First</p>'>]
+>>> response.xpath('//p[has-class("foo", "bar")]')
+[]
 
-This is because the expression ``.//text()`` yields a collection of text elements -- a *node-set*.
-And when a node-set is converted to a string, which happens when it is passed as argument to
-a string function like ``contains()`` or ``starts-with()``, it results in the text for the first element only.
+So XPath ``//p[has-class("foo", "bar-baz")]`` is roughly equivalent to CSS
+``p.foo.bar-baz``.  Please note, that it is slower in most of the cases,
+because it's a pure-Python function that's invoked for every node in question
+whereas the CSS lookup is translated into XPath and thus runs more efficiently,
+so performance-wise its uses are limited to situations that are not easily
+described with CSS selectors.
 
-Example::
+Parsel also simplifies adding your own XPath extensions.
 
-    >>> from scrapy import Selector
-    >>> sel = Selector(text='<a href="#">Click here to go to the <strong>Next Page</strong></a>')
-
-Converting a *node-set* to string::
-
-    >>> sel.xpath('//a//text()').extract() # take a peek at the node-set
-    [u'Click here to go to the ', u'Next Page']
-    >>> sel.xpath("string(//a[1]//text())").extract() # convert it to string
-    [u'Click here to go to the ']
-
-A *node* converted to a string, however, puts together the text of itself plus of all its descendants::
-
-    >>> sel.xpath("//a[1]").extract() # select the first node
-    [u'<a href="#">Click here to go to the <strong>Next Page</strong></a>']
-    >>> sel.xpath("string(//a[1])").extract() # convert it to string
-    [u'Click here to go to the Next Page']
-
-So, using the ``.//text()`` node-set won't select anything in this case::
-
-    >>> sel.xpath("//a[contains(.//text(), 'Next Page')]").extract()
-    []
-
-But using the ``.`` to mean the node, works::
-
-    >>> sel.xpath("//a[contains(., 'Next Page')]").extract()
-    [u'<a href="#">Click here to go to the <strong>Next Page</strong></a>']
-
-.. _`XPath string function`: http://www.w3.org/TR/xpath/#section-String-Functions
-
-Beware the difference between //node[1] and (//node)[1]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-``//node[1]`` selects all the nodes occurring first under their respective parents.
-
-``(//node)[1]`` selects all the nodes in the document, and then gets only the first of them.
-
-Example::
-
-    >>> from scrapy import Selector
-    >>> sel = Selector(text="""
-    ....:     <ul class="list">
-    ....:         <li>1</li>
-    ....:         <li>2</li>
-    ....:         <li>3</li>
-    ....:     </ul>
-    ....:     <ul class="list">
-    ....:         <li>4</li>
-    ....:         <li>5</li>
-    ....:         <li>6</li>
-    ....:     </ul>""")
-    >>> xp = lambda x: sel.xpath(x).extract()
-
-This gets all first ``<li>``  elements under whatever it is its parent::
-
-    >>> xp("//li[1]")
-    [u'<li>1</li>', u'<li>4</li>']
-
-And this gets the first ``<li>``  element in the whole document::
-
-    >>> xp("(//li)[1]")
-    [u'<li>1</li>']
-
-This gets all first ``<li>``  elements under an ``<ul>``  parent::
-
-    >>> xp("//ul/li[1]")
-    [u'<li>1</li>', u'<li>4</li>']
-
-And this gets the first ``<li>``  element under an ``<ul>``  parent in the whole document::
-
-    >>> xp("(//ul/li)[1]")
-    [u'<li>1</li>']
-
-When querying by class, consider using CSS
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Because an element can contain multiple CSS classes, the XPath way to select elements
-by class is the rather verbose::
-
-    *[contains(concat(' ', normalize-space(@class), ' '), ' someclass ')]
-
-If you use ``@class='someclass'`` you may end up missing elements that have
-other classes, and if you just use ``contains(@class, 'someclass')`` to make up
-for that you may end up with more elements that you want, if they have a different
-class name that shares the string ``someclass``.
-
-As it turns out, Scrapy selectors allow you to chain selectors, so most of the time
-you can just select by class using CSS and then switch to XPath when needed::
-
-    >>> from scrapy import Selector
-    >>> sel = Selector(text='<div class="hero shout"><time datetime="2014-07-23 19:00">Special date</time></div>')
-    >>> sel.css('.shout').xpath('./time/@datetime').extract()
-    [u'2014-07-23 19:00']
-
-This is cleaner than using the verbose XPath trick shown above. Just remember
-to use the ``.`` in the XPath expressions that will follow.
+.. autofunction:: parsel.xpathfuncs.set_xpathfunc
 
 
 .. _topics-selectors-ref:
@@ -538,164 +903,116 @@ Built-in Selectors reference
 .. module:: scrapy.selector
    :synopsis: Selector class
 
-.. class:: Selector(response=None, text=None, type=None)
+Selector objects
+----------------
 
-  An instance of :class:`Selector` is a wrapper over response to select
-  certain parts of its content.
+.. autoclass:: Selector
 
-  ``response`` is a :class:`~scrapy.http.HtmlResponse` or
-  :class:`~scrapy.http.XmlResponse` object that will be used for selecting and
-  extracting data.
-
-  ``text`` is a unicode string or utf-8 encoded text for cases when a
-  ``response`` isn't available. Using ``text`` and ``response`` together is
-  undefined behavior.
-
-  ``type`` defines the selector type, it can be ``"html"``, ``"xml"`` or ``None`` (default).
-
-    If ``type`` is ``None``, the selector automatically chooses the best type
-    based on ``response`` type (see below), or defaults to ``"html"`` in case it
-    is used together with ``text``.
-
-    If ``type`` is ``None`` and a ``response`` is passed, the selector type is
-    inferred from the response type as follow:
-
-        * ``"html"`` for :class:`~scrapy.http.HtmlResponse` type
-        * ``"xml"`` for :class:`~scrapy.http.XmlResponse` type
-        * ``"html"`` for anything else
-
-   Otherwise, if ``type`` is set, the selector type will be forced and no
-   detection will occur.
-
-  .. method:: xpath(query)
-
-      Find nodes matching the xpath ``query`` and return the result as a
-      :class:`SelectorList` instance with all elements flattened. List
-      elements implement :class:`Selector` interface too.
-
-      ``query`` is a string containing the XPATH query to apply.
+  .. automethod:: xpath
 
       .. note::
 
-          For convenience this method can be called as ``response.xpath()``
+          For convenience, this method can be called as ``response.xpath()``
 
-  .. method:: css(query)
-
-      Apply the given CSS selector and return a :class:`SelectorList` instance.
-
-      ``query`` is a string containing the CSS selector to apply.
-
-      In the background, CSS queries are translated into XPath queries using
-      `cssselect`_ library and run ``.xpath()`` method.
+  .. automethod:: css
 
       .. note::
 
-          For convenience this method can be called as ``response.css()``
+          For convenience, this method can be called as ``response.css()``
 
-  .. method:: extract()
+  .. automethod:: get
 
-     Serialize and return the matched nodes as a list of unicode strings.
-     Percent encoded content is unquoted.
+     See also: :ref:`old-extraction-api`
 
-  .. method:: re(regex)
+  .. autoattribute:: attrib
 
-     Apply the given regex and return a list of unicode strings with the
-     matches.
+     See also: :ref:`selecting-attributes`.
 
-     ``regex`` can be either a compiled regular expression or a string which
-     will be compiled to a regular expression using ``re.compile(regex)``
+  .. automethod:: re
 
-  .. method:: register_namespace(prefix, uri)
+  .. automethod:: re_first
 
-     Register the given namespace to be used in this :class:`Selector`.
-     Without registering namespaces you can't select or extract data from
-     non-standard namespaces. See examples below.
+  .. automethod:: register_namespace
 
-  .. method:: remove_namespaces()
+  .. automethod:: remove_namespaces
 
-     Remove all namespaces, allowing to traverse the document using
-     namespace-less xpaths. See example below.
+  .. automethod:: __bool__
 
-  .. method:: __nonzero__()
-
-     Returns ``True`` if there is any real content selected or ``False``
-     otherwise.  In other words, the boolean value of a :class:`Selector` is
-     given by the contents it selects.
+  .. automethod:: getall
 
+     This method is added to Selector for consistency; it is more useful
+     with SelectorList. See also: :ref:`old-extraction-api`
 
 SelectorList objects
 --------------------
 
-.. class:: SelectorList
+.. autoclass:: SelectorList
 
-   The :class:`SelectorList` class is subclass of the builtin ``list``
-   class, which provides a few additional methods.
+   .. automethod:: xpath
 
-   .. method:: xpath(query)
+   .. automethod:: css
 
-       Call the ``.xpath()`` method for each element in this list and return
-       their results flattened as another :class:`SelectorList`.
+   .. automethod:: getall
 
-       ``query`` is the same argument as the one in :meth:`Selector.xpath`
+      See also: :ref:`old-extraction-api`
 
-   .. method:: css(query)
+   .. automethod:: get
 
-       Call the ``.css()`` method for each element in this list and return
-       their results flattened as another :class:`SelectorList`.
+      See also: :ref:`old-extraction-api`
 
-       ``query`` is the same argument as the one in :meth:`Selector.css`
+   .. automethod:: re
 
-   .. method:: extract()
+   .. automethod:: re_first
 
-       Call the ``.extract()`` method for each element is this list and return
-       their results flattened, as a list of unicode strings.
+   .. autoattribute:: attrib
 
-   .. method:: re()
+      See also: :ref:`selecting-attributes`.
 
-       Call the ``.re()`` method for each element is this list and return
-       their results flattened, as a list of unicode strings.
+.. _selector-examples:
 
-   .. method:: __nonzero__()
-
-        returns True if the list is not empty, False otherwise.
+Examples
+========
 
+.. _selector-examples-html:
 
 Selector examples on HTML response
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+----------------------------------
 
-Here's a couple of :class:`Selector` examples to illustrate several concepts.
-In all cases, we assume there is already an :class:`Selector` instantiated with
+Here are some :class:`Selector` examples to illustrate several concepts.
+In all cases, we assume there is already a :class:`Selector` instantiated with
 a :class:`~scrapy.http.HtmlResponse` object like this::
 
       sel = Selector(html_response)
 
-1. Select all ``<h1>`` elements from a HTML response body, returning a list of
-   :class:`Selector` objects (ie. a :class:`SelectorList` object)::
+1. Select all ``<h1>`` elements from an HTML response body, returning a list of
+   :class:`Selector` objects (i.e. a :class:`SelectorList` object)::
 
       sel.xpath("//h1")
 
-2. Extract the text of all ``<h1>`` elements from a HTML response body,
-   returning a list of unicode strings::
+2. Extract the text of all ``<h1>`` elements from an HTML response body,
+   returning a list of strings::
 
-      sel.xpath("//h1").extract()         # this includes the h1 tag
-      sel.xpath("//h1/text()").extract()  # this excludes the h1 tag
+      sel.xpath("//h1").getall()         # this includes the h1 tag
+      sel.xpath("//h1/text()").getall()  # this excludes the h1 tag
 
 3. Iterate over all ``<p>`` tags and print their class attribute::
 
       for node in sel.xpath("//p"):
-          print node.xpath("@class").extract()
+          print(node.attrib['class'])
+
+
+.. _selector-examples-xml:
 
 Selector examples on XML response
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+---------------------------------
 
-Here's a couple of examples to illustrate several concepts. In both cases we
-assume there is already an :class:`Selector` instantiated with a
-:class:`~scrapy.http.XmlResponse` object like this::
+Here are some examples to illustrate concepts for :class:`Selector` objects
+instantiated with an :class:`~scrapy.http.XmlResponse` object::
 
       sel = Selector(xml_response)
 
-1. Select all ``<product>`` elements from a XML response body, returning a list
-   of :class:`Selector` objects (ie. a :class:`SelectorList` object)::
+1. Select all ``<product>`` elements from an XML response body, returning a list
+   of :class:`Selector` objects (i.e. a :class:`SelectorList` object)::
 
       sel.xpath("//product")
 
@@ -703,49 +1020,6 @@ assume there is already an :class:`Selector` instantiated with a
    a namespace::
 
       sel.register_namespace("g", "http://base.google.com/ns/1.0")
-      sel.xpath("//g:price").extract()
-
-.. _removing-namespaces:
-
-Removing namespaces
-~~~~~~~~~~~~~~~~~~~
-
-When dealing with scraping projects, it is often quite convenient to get rid of
-namespaces altogether and just work with element names, to write more
-simple/convenient XPaths. You can use the
-:meth:`Selector.remove_namespaces` method for that.
-
-Let's show an example that illustrates this with Github blog atom feed.
-
-First, we open the shell with the url we want to scrape::
-
-    $ scrapy shell https://github.com/blog.atom
-
-Once in the shell we can try selecting all ``<link>`` objects and see that it
-doesn't work (because the Atom XML namespace is obfuscating those nodes)::
-
-    >>> response.xpath("//link")
-    []
-
-But once we call the :meth:`Selector.remove_namespaces` method, all
-nodes can be accessed directly by their names::
-
-    >>> response.selector.remove_namespaces()
-    >>> response.xpath("//link")
-    [<Selector xpath='//link' data=u'<link xmlns="http://www.w3.org/2005/Atom'>,
-     <Selector xpath='//link' data=u'<link xmlns="http://www.w3.org/2005/Atom'>,
-     ...
-
-If you wonder why the namespace removal procedure is not always called, instead
-of having to call it manually. This is because of two reasons which, in order
-of relevance, are:
-
-1. Removing namespaces requires to iterate and modify all nodes in the
-   document, which is a reasonably expensive operation to performs for all
-   documents crawled by Scrapy
-
-2. There could be some cases where using namespaces is actually required, in
-   case some element names clash between namespaces. These cases are very rare
-   though.
+      sel.xpath("//g:price").getall()
 
 .. _Google Base XML feed: https://support.google.com/merchants/answer/160589?hl=en&ref_topic=2473799
diff --git a/docs/topics/settings.rst b/docs/topics/settings.rst
index 0838cfc46..618b9989e 100644
--- a/docs/topics/settings.rst
+++ b/docs/topics/settings.rst
@@ -16,6 +16,8 @@ project (in case you have many).
 
 For a list of available built-in settings see: :ref:`topics-settings-ref`.
 
+.. _topics-settings-module-envvar:
+
 Designating the settings
 ========================
 
@@ -24,9 +26,9 @@ do this by using an environment variable, ``SCRAPY_SETTINGS_MODULE``.
 
 The value of ``SCRAPY_SETTINGS_MODULE`` should be in Python path syntax, e.g.
 ``myproject.settings``. Note that the settings module should be on the
-Python `import search path`_.
+Python :ref:`import search path <tut-searchpath>`.
 
-.. _import search path: http://docs.python.org/2/tutorial/modules.html#the-module-search-path
+.. _populating-settings:
 
 Populating the settings
 =======================
@@ -36,9 +38,10 @@ different precedence. Here is the list of them in decreasing order of
 precedence:
 
  1. Command line options (most precedence)
- 2. Project settings module
- 3. Default settings per-command
- 4. Default global settings (less precedence)
+ 2. Settings per-spider
+ 3. Project settings module
+ 4. Default settings per-command
+ 5. Default global settings (less precedence)
 
 The population of these settings sources is taken care of internally, but a
 manual handling is possible using API calls. See the
@@ -59,14 +62,29 @@ Example::
 
     scrapy crawl myspider -s LOG_FILE=scrapy.log
 
-2. Project settings module
+2. Settings per-spider
+----------------------
+
+Spiders (See the :ref:`topics-spiders` chapter for reference) can define their
+own settings that will take precedence and override the project ones. They can
+do so by setting their :attr:`~scrapy.spiders.Spider.custom_settings` attribute::
+
+    class MySpider(scrapy.Spider):
+        name = 'myspider'
+
+        custom_settings = {
+            'SOME_SETTING': 'some value',
+        }
+
+3. Project settings module
 --------------------------
 
 The project settings module is the standard configuration file for your Scrapy
-project.  It's where most of your custom settings will be populated. For
-example:: ``myproject.settings``.
+project, it's where most of your custom settings will be populated. For a
+standard Scrapy project, this means you'll be adding or changing the settings
+in the ``settings.py`` file created for your project.
 
-3. Default settings per-command
+4. Default settings per-command
 -------------------------------
 
 Each :doc:`Scrapy tool </topics/commands>` command can have its own default
@@ -74,7 +92,7 @@ settings, which override the global default settings. Those custom command
 settings are specified in the ``default_settings`` attribute of the command
 class.
 
-4. Default global settings
+5. Default global settings
 --------------------------
 
 The global defaults are located in the ``scrapy.settings.default_settings``
@@ -85,22 +103,39 @@ How to access settings
 
 .. highlight:: python
 
+In a spider, the settings are available through ``self.settings``::
+
+    class MySpider(scrapy.Spider):
+        name = 'myspider'
+        start_urls = ['http://example.com']
+
+        def parse(self, response):
+            print("Existing settings: %s" % self.settings.attributes.keys())
+
+.. note::
+    The ``settings`` attribute is set in the base Spider class after the spider
+    is initialized.  If you want to use the settings before the initialization
+    (e.g., in your spider's ``__init__()`` method), you'll need to override the
+    :meth:`~scrapy.spiders.Spider.from_crawler` method.
+
 Settings can be accessed through the :attr:`scrapy.crawler.Crawler.settings`
 attribute of the Crawler that is passed to ``from_crawler`` method in
-extensions and middlewares::
+extensions, middlewares and item pipelines::
 
-    class MyExtension(object):
+    class MyExtension:
+        def __init__(self, log_is_enabled=False):
+            if log_is_enabled:
+                print("log is enabled!")
 
         @classmethod
         def from_crawler(cls, crawler):
             settings = crawler.settings
-            if settings['LOG_ENABLED']:
-                print "log is enabled!"
+            return cls(settings.getbool('LOG_ENABLED'))
 
-In other words, settings can be accessed like a dict, but it's usually preferred
-to extract the setting in the format you need it to avoid type errors. In order
-to do that you'll have to use one of the methods provided the
-:class:`~scrapy.settings.Settings` API.
+The settings object can be used like a dict (e.g.,
+``settings['LOG_ENABLED']``), but it's usually preferred to extract the setting
+in the format you need it to avoid type errors, using one of the methods
+provided by the :class:`~scrapy.settings.Settings` API.
 
 Rationale for setting names
 ===========================
@@ -143,6 +178,64 @@ Default: ``None``
 The AWS secret key used by code that requires access to `Amazon Web services`_,
 such as the :ref:`S3 feed storage backend <topics-feed-storage-s3>`.
 
+.. setting:: AWS_ENDPOINT_URL
+
+AWS_ENDPOINT_URL
+----------------
+
+Default: ``None``
+
+Endpoint URL used for S3-like storage, for example Minio or s3.scality.
+
+.. setting:: AWS_USE_SSL
+
+AWS_USE_SSL
+-----------
+
+Default: ``None``
+
+Use this option if you want to disable SSL connection for communication with
+S3 or S3-like storage. By default SSL will be used.
+
+.. setting:: AWS_VERIFY
+
+AWS_VERIFY
+----------
+
+Default: ``None``
+
+Verify SSL connection between Scrapy and S3 or S3-like storage. By default
+SSL verification will occur.
+
+.. setting:: AWS_REGION_NAME
+
+AWS_REGION_NAME
+---------------
+
+Default: ``None``
+
+The name of the region associated with the AWS client.
+
+.. setting:: ASYNCIO_EVENT_LOOP
+
+ASYNCIO_EVENT_LOOP
+------------------
+
+Default: ``None``
+
+Import path of a given asyncio event loop class.
+
+If the asyncio reactor is enabled (see :setting:`TWISTED_REACTOR`) this setting can be used to specify the 
+asyncio event loop to be used with it. Set the setting to the import path of the 
+desired asyncio event loop class. If the setting is set to ``None`` the default asyncio
+event loop will be used.
+
+If you are installing the asyncio reactor manually using the :func:`~scrapy.utils.reactor.install_reactor`
+function, you can use the ``event_loop_path`` parameter to indicate the import path of the event loop 
+class to be used.  
+
+Note that the event loop class must inherit from :class:`asyncio.AbstractEventLoop`.
+
 .. setting:: BOT_NAME
 
 BOT_NAME
@@ -151,8 +244,7 @@ BOT_NAME
 Default: ``'scrapybot'``
 
 The name of the bot implemented by this Scrapy project (also known as the
-project name). This will be used to construct the User-Agent by default, and
-also for logging.
+project name). This name will be used for the logging too.
 
 It's automatically populated with your project name when you create your
 project with the :command:`startproject` command.
@@ -164,8 +256,8 @@ CONCURRENT_ITEMS
 
 Default: ``100``
 
-Maximum number of concurrent items (per response) to process in parallel in the
-Item Processor (also known as the :ref:`Item Pipeline <topics-item-pipeline>`).
+Maximum number of concurrent items (per response) to process in parallel in
+:ref:`item pipelines <topics-item-pipeline>`.
 
 .. setting:: CONCURRENT_REQUESTS
 
@@ -174,10 +266,9 @@ CONCURRENT_REQUESTS
 
 Default: ``16``
 
-The maximum number of concurrent (ie. simultaneous) requests that will be
+The maximum number of concurrent (i.e. simultaneous) requests that will be
 performed by the Scrapy downloader.
 
-
 .. setting:: CONCURRENT_REQUESTS_PER_DOMAIN
 
 CONCURRENT_REQUESTS_PER_DOMAIN
@@ -185,9 +276,13 @@ CONCURRENT_REQUESTS_PER_DOMAIN
 
 Default: ``8``
 
-The maximum number of concurrent (ie. simultaneous) requests that will be
+The maximum number of concurrent (i.e. simultaneous) requests that will be
 performed to any single domain.
 
+See also: :ref:`topics-autothrottle` and its
+:setting:`AUTOTHROTTLE_TARGET_CONCURRENCY` option.
+
+
 .. setting:: CONCURRENT_REQUESTS_PER_IP
 
 CONCURRENT_REQUESTS_PER_IP
@@ -195,15 +290,15 @@ CONCURRENT_REQUESTS_PER_IP
 
 Default: ``0``
 
-The maximum number of concurrent (ie. simultaneous) requests that will be
+The maximum number of concurrent (i.e. simultaneous) requests that will be
 performed to any single IP. If non-zero, the
 :setting:`CONCURRENT_REQUESTS_PER_DOMAIN` setting is ignored, and this one is
 used instead. In other words, concurrency limits will be applied per IP, not
 per domain.
 
-This setting also affects :setting:`DOWNLOAD_DELAY`:
-if :setting:`CONCURRENT_REQUESTS_PER_IP` is non-zero, download delay is
-enforced per IP, not per domain.
+This setting also affects :setting:`DOWNLOAD_DELAY` and
+:ref:`topics-autothrottle`: if :setting:`CONCURRENT_REQUESTS_PER_IP`
+is non-zero, download delay is enforced per IP, not per domain.
 
 
 .. setting:: DEFAULT_ITEM_CLASS
@@ -229,7 +324,7 @@ Default::
     }
 
 The default headers used for Scrapy HTTP Requests. They're populated in the
-:class:`~scrapy.contrib.downloadermiddleware.defaultheaders.DefaultHeadersMiddleware`.
+:class:`~scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware`.
 
 .. setting:: DEPTH_LIMIT
 
@@ -238,6 +333,8 @@ DEPTH_LIMIT
 
 Default: ``0``
 
+Scope: ``scrapy.spidermiddlewares.depth.DepthMiddleware``
+
 The maximum depth that will be allowed to crawl for any site. If zero, no limit
 will be imposed.
 
@@ -248,18 +345,24 @@ DEPTH_PRIORITY
 
 Default: ``0``
 
-An integer that is used to adjust the request priority based on its depth.
+Scope: ``scrapy.spidermiddlewares.depth.DepthMiddleware``
 
-If zero, no priority adjustment is made from depth.
+An integer that is used to adjust the :attr:`~scrapy.http.Request.priority` of
+a :class:`~scrapy.http.Request` based on its depth.
 
-.. setting:: DEPTH_STATS
+The priority of a request is adjusted as follows::
 
-DEPTH_STATS
------------
+    request.priority = request.priority - ( depth * DEPTH_PRIORITY )
 
-Default: ``True``
+As depth increases, positive values of ``DEPTH_PRIORITY`` decrease request
+priority (BFO), while negative values increase request priority (DFO). See
+also :ref:`faq-bfo-dfo`.
 
-Whether to collect maximum depth stats.
+.. note::
+
+    This setting adjusts priority **in the opposite way** compared to
+    other priority settings :setting:`REDIRECT_PRIORITY_ADJUST`
+    and :setting:`RETRY_PRIORITY_ADJUST`.
 
 .. setting:: DEPTH_STATS_VERBOSE
 
@@ -268,6 +371,8 @@ DEPTH_STATS_VERBOSE
 
 Default: ``False``
 
+Scope: ``scrapy.spidermiddlewares.depth.DepthMiddleware``
+
 Whether to collect verbose depth stats. If this is enabled, the number of
 requests for each depth is collected in the stats.
 
@@ -280,6 +385,39 @@ Default: ``True``
 
 Whether to enable DNS in-memory cache.
 
+.. setting:: DNSCACHE_SIZE
+
+DNSCACHE_SIZE
+-------------
+
+Default: ``10000``
+
+DNS in-memory cache size.
+
+.. setting:: DNS_RESOLVER
+
+DNS_RESOLVER
+------------
+
+.. versionadded:: 2.0
+
+Default: ``'scrapy.resolver.CachingThreadedResolver'``
+
+The class to be used to resolve DNS names. The default ``scrapy.resolver.CachingThreadedResolver``
+supports specifying a timeout for DNS requests via the :setting:`DNS_TIMEOUT` setting,
+but works only with IPv4 addresses. Scrapy provides an alternative resolver,
+``scrapy.resolver.CachingHostnameResolver``, which supports IPv4/IPv6 addresses but does not
+take the :setting:`DNS_TIMEOUT` setting into account.
+
+.. setting:: DNS_TIMEOUT
+
+DNS_TIMEOUT
+-----------
+
+Default: ``60``
+
+Timeout for processing of DNS queries in seconds. Float is supported.
+
 .. setting:: DOWNLOADER
 
 DOWNLOADER
@@ -289,6 +427,106 @@ Default: ``'scrapy.core.downloader.Downloader'``
 
 The downloader to use for crawling.
 
+.. setting:: DOWNLOADER_HTTPCLIENTFACTORY
+
+DOWNLOADER_HTTPCLIENTFACTORY
+----------------------------
+
+Default: ``'scrapy.core.downloader.webclient.ScrapyHTTPClientFactory'``
+
+Defines a Twisted ``protocol.ClientFactory``  class to use for HTTP/1.0
+connections (for ``HTTP10DownloadHandler``).
+
+.. note::
+
+    HTTP/1.0 is rarely used nowadays so you can safely ignore this setting,
+    unless you really want to use HTTP/1.0 and override
+    :setting:`DOWNLOAD_HANDLERS` for ``http(s)`` scheme accordingly,
+    i.e. to ``'scrapy.core.downloader.handlers.http.HTTP10DownloadHandler'``.
+
+.. setting:: DOWNLOADER_CLIENTCONTEXTFACTORY
+
+DOWNLOADER_CLIENTCONTEXTFACTORY
+-------------------------------
+
+Default: ``'scrapy.core.downloader.contextfactory.ScrapyClientContextFactory'``
+
+Represents the classpath to the ContextFactory to use.
+
+Here, "ContextFactory" is a Twisted term for SSL/TLS contexts, defining
+the TLS/SSL protocol version to use, whether to do certificate verification,
+or even enable client-side authentication (and various other things).
+
+.. note::
+
+    Scrapy default context factory **does NOT perform remote server
+    certificate verification**. This is usually fine for web scraping.
+
+    If you do need remote server certificate verification enabled,
+    Scrapy also has another context factory class that you can set,
+    ``'scrapy.core.downloader.contextfactory.BrowserLikeContextFactory'``,
+    which uses the platform's certificates to validate remote endpoints.
+
+If you do use a custom ContextFactory, make sure its ``__init__`` method
+accepts a ``method`` parameter (this is the ``OpenSSL.SSL`` method mapping
+:setting:`DOWNLOADER_CLIENT_TLS_METHOD`), a ``tls_verbose_logging``
+parameter (``bool``) and a ``tls_ciphers`` parameter (see
+:setting:`DOWNLOADER_CLIENT_TLS_CIPHERS`).
+
+.. setting:: DOWNLOADER_CLIENT_TLS_CIPHERS
+
+DOWNLOADER_CLIENT_TLS_CIPHERS
+-----------------------------
+
+Default: ``'DEFAULT'``
+
+Use  this setting to customize the TLS/SSL ciphers used by the default
+HTTP/1.1 downloader.
+
+The setting should contain a string in the `OpenSSL cipher list format`_,
+these ciphers will be used as client ciphers. Changing this setting may be
+necessary to access certain HTTPS websites: for example, you may need to use
+``'DEFAULT:!DH'`` for a website with weak DH parameters or enable a
+specific cipher that is not included in ``DEFAULT`` if a website requires it.
+
+.. _OpenSSL cipher list format: https://www.openssl.org/docs/manmaster/man1/openssl-ciphers.html#CIPHER-LIST-FORMAT
+
+.. setting:: DOWNLOADER_CLIENT_TLS_METHOD
+
+DOWNLOADER_CLIENT_TLS_METHOD
+----------------------------
+
+Default: ``'TLS'``
+
+Use this setting to customize the TLS/SSL method used by the default
+HTTP/1.1 downloader.
+
+This setting must be one of these string values:
+
+- ``'TLS'``: maps to OpenSSL's ``TLS_method()`` (a.k.a ``SSLv23_method()``),
+  which allows protocol negotiation, starting from the highest supported
+  by the platform; **default, recommended**
+- ``'TLSv1.0'``: this value forces HTTPS connections to use TLS version 1.0 ;
+  set this if you want the behavior of Scrapy<1.1
+- ``'TLSv1.1'``: forces TLS version 1.1
+- ``'TLSv1.2'``: forces TLS version 1.2
+- ``'SSLv3'``: forces SSL version 3 (**not recommended**)
+
+
+.. setting:: DOWNLOADER_CLIENT_TLS_VERBOSE_LOGGING
+
+DOWNLOADER_CLIENT_TLS_VERBOSE_LOGGING
+-------------------------------------
+
+Default: ``False``
+
+Setting this to ``True`` will enable DEBUG level messages about TLS connection
+parameters after establishing HTTPS connections. The kind of information logged
+depends on the versions of OpenSSL and pyOpenSSL.
+
+This setting is only used for the default
+:setting:`DOWNLOADER_CLIENTCONTEXTFACTORY`.
+
 .. setting:: DOWNLOADER_MIDDLEWARES
 
 DOWNLOADER_MIDDLEWARES
@@ -307,23 +545,24 @@ DOWNLOADER_MIDDLEWARES_BASE
 Default::
 
     {
-        'scrapy.contrib.downloadermiddleware.robotstxt.RobotsTxtMiddleware': 100,
-        'scrapy.contrib.downloadermiddleware.httpauth.HttpAuthMiddleware': 300,
-        'scrapy.contrib.downloadermiddleware.downloadtimeout.DownloadTimeoutMiddleware': 350,
-        'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': 400,
-        'scrapy.contrib.downloadermiddleware.retry.RetryMiddleware': 500,
-        'scrapy.contrib.downloadermiddleware.defaultheaders.DefaultHeadersMiddleware': 550,
-        'scrapy.contrib.downloadermiddleware.redirect.MetaRefreshMiddleware': 580,
-        'scrapy.contrib.downloadermiddleware.httpcompression.HttpCompressionMiddleware': 590,
-        'scrapy.contrib.downloadermiddleware.redirect.RedirectMiddleware': 600,
-        'scrapy.contrib.downloadermiddleware.cookies.CookiesMiddleware': 700,
-        'scrapy.contrib.downloadermiddleware.httpproxy.HttpProxyMiddleware': 750,
-        'scrapy.contrib.downloadermiddleware.chunked.ChunkedTransferMiddleware': 830,
-        'scrapy.contrib.downloadermiddleware.stats.DownloaderStats': 850,
-        'scrapy.contrib.downloadermiddleware.httpcache.HttpCacheMiddleware': 900,
+        'scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware': 100,
+        'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware': 300,
+        'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware': 350,
+        'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware': 400,
+        'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': 500,
+        'scrapy.downloadermiddlewares.retry.RetryMiddleware': 550,
+        'scrapy.downloadermiddlewares.ajaxcrawl.AjaxCrawlMiddleware': 560,
+        'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware': 580,
+        'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 590,
+        'scrapy.downloadermiddlewares.redirect.RedirectMiddleware': 600,
+        'scrapy.downloadermiddlewares.cookies.CookiesMiddleware': 700,
+        'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware': 750,
+        'scrapy.downloadermiddlewares.stats.DownloaderStats': 850,
+        'scrapy.downloadermiddlewares.httpcache.HttpCacheMiddleware': 900,
     }
 
-A dict containing the downloader middlewares enabled by default in Scrapy. You
+A dict containing the downloader middlewares enabled by default in Scrapy. Low
+orders are closer to the engine, high orders are closer to the downloader. You
 should never modify this setting in your project, modify
 :setting:`DOWNLOADER_MIDDLEWARES` instead.  For more info see
 :ref:`topics-downloader-middleware-setting`.
@@ -353,12 +592,13 @@ supported.  Example::
 
 This setting is also affected by the :setting:`RANDOMIZE_DOWNLOAD_DELAY`
 setting (which is enabled by default). By default, Scrapy doesn't wait a fixed
-amount of time between requests, but uses a random interval between 0.5 and 1.5
-* :setting:`DOWNLOAD_DELAY`.
+amount of time between requests, but uses a random interval between 0.5 * :setting:`DOWNLOAD_DELAY` and 1.5 * :setting:`DOWNLOAD_DELAY`.
 
 When :setting:`CONCURRENT_REQUESTS_PER_IP` is non-zero, delays are enforced
 per ip address instead of per domain.
 
+.. _spider-download_delay-attribute:
+
 You can also change this setting per spider by setting ``download_delay``
 spider attribute.
 
@@ -370,7 +610,7 @@ DOWNLOAD_HANDLERS
 Default: ``{}``
 
 A dict containing the request downloader handlers enabled in your project.
-See `DOWNLOAD_HANDLERS_BASE` for example format.
+See :setting:`DOWNLOAD_HANDLERS_BASE` for example format.
 
 .. setting:: DOWNLOAD_HANDLERS_BASE
 
@@ -381,22 +621,23 @@ Default::
 
     {
         'file': 'scrapy.core.downloader.handlers.file.FileDownloadHandler',
-        'http': 'scrapy.core.downloader.handlers.http.HttpDownloadHandler',
-        'https': 'scrapy.core.downloader.handlers.http.HttpDownloadHandler',
+        'http': 'scrapy.core.downloader.handlers.http.HTTPDownloadHandler',
+        'https': 'scrapy.core.downloader.handlers.http.HTTPDownloadHandler',
         's3': 'scrapy.core.downloader.handlers.s3.S3DownloadHandler',
+        'ftp': 'scrapy.core.downloader.handlers.ftp.FTPDownloadHandler',
     }
 
+
 A dict containing the request download handlers enabled by default in Scrapy.
 You should never modify this setting in your project, modify
 :setting:`DOWNLOAD_HANDLERS` instead.
 
-If you want to disable any of the above download handlers you must define them
-in your project's :setting:`DOWNLOAD_HANDLERS` setting and assign `None`
-as their value.  For example, if you want to disable the file download
-handler::
+You can disable any of these download handlers by assigning ``None`` to their
+URI scheme in :setting:`DOWNLOAD_HANDLERS`. E.g., to disable the built-in FTP
+handler (without replacement), place this in your ``settings.py``::
 
     DOWNLOAD_HANDLERS = {
-        'file': None,
+        'ftp': None,
     }
 
 .. setting:: DOWNLOAD_TIMEOUT
@@ -408,12 +649,80 @@ Default: ``180``
 
 The amount of time (in secs) that the downloader will wait before timing out.
 
+.. note::
+
+    This timeout can be set per spider using :attr:`download_timeout`
+    spider attribute and per-request using :reqmeta:`download_timeout`
+    Request.meta key.
+
+.. setting:: DOWNLOAD_MAXSIZE
+
+DOWNLOAD_MAXSIZE
+----------------
+
+Default: ``1073741824`` (1024MB)
+
+The maximum response size (in bytes) that downloader will download.
+
+If you want to disable it set to 0.
+
+.. reqmeta:: download_maxsize
+
+.. note::
+
+    This size can be set per spider using :attr:`download_maxsize`
+    spider attribute and per-request using :reqmeta:`download_maxsize`
+    Request.meta key.
+
+.. setting:: DOWNLOAD_WARNSIZE
+
+DOWNLOAD_WARNSIZE
+-----------------
+
+Default: ``33554432`` (32MB)
+
+The response size (in bytes) that downloader will start to warn.
+
+If you want to disable it set to 0.
+
+.. note::
+
+    This size can be set per spider using :attr:`download_warnsize`
+    spider attribute and per-request using :reqmeta:`download_warnsize`
+    Request.meta key.
+
+.. setting:: DOWNLOAD_FAIL_ON_DATALOSS
+
+DOWNLOAD_FAIL_ON_DATALOSS
+-------------------------
+
+Default: ``True``
+
+Whether or not to fail on broken responses, that is, declared
+``Content-Length`` does not match content sent by the server or chunked
+response was not properly finish. If ``True``, these responses raise a
+``ResponseFailed([_DataLoss])`` error. If ``False``, these responses
+are passed through and the flag ``dataloss`` is added to the response, i.e.:
+``'dataloss' in response.flags`` is ``True``.
+
+Optionally, this can be set per-request basis by using the
+:reqmeta:`download_fail_on_dataloss` Request.meta key to ``False``.
+
+.. note::
+
+  A broken response, or data loss error, may happen under several
+  circumstances, from server misconfiguration to network errors to data
+  corruption. It is up to the user to decide if it makes sense to process
+  broken responses considering they may contain partial or incomplete content.
+  If :setting:`RETRY_ENABLED` is ``True`` and this setting is set to ``True``,
+  the ``ResponseFailed([_DataLoss])`` failure will be retried as usual.
+
 .. setting:: DUPEFILTER_CLASS
 
 DUPEFILTER_CLASS
 ----------------
 
-Default: ``'scrapy.dupefilter.RFPDupeFilter'``
+Default: ``'scrapy.dupefilters.RFPDupeFilter'``
 
 The class used to detect and filter duplicate requests.
 
@@ -424,6 +733,13 @@ override its ``request_fingerprint`` method. This method should accept
 scrapy :class:`~scrapy.http.Request` object and return its fingerprint
 (a string).
 
+You can disable filtering of duplicate requests by setting
+:setting:`DUPEFILTER_CLASS` to ``'scrapy.dupefilters.BaseDupeFilter'``.
+Be very careful about this however, because you can get into crawling loops.
+It's usually a better idea to set the ``dont_filter`` parameter to
+``True`` on the specific :class:`~scrapy.http.Request` that should not be
+filtered.
+
 .. setting:: DUPEFILTER_DEBUG
 
 DUPEFILTER_DEBUG
@@ -439,11 +755,11 @@ Setting :setting:`DUPEFILTER_DEBUG` to ``True`` will make it log all duplicate r
 EDITOR
 ------
 
-Default: `depends on the environment`
+Default: ``vi`` (on Unix systems) or the IDLE editor (on Windows)
 
-The editor to use for editing spiders with the :command:`edit` command. It
-defaults to the ``EDITOR`` environment variable, if set. Otherwise, it defaults
-to ``vi`` (on Unix systems) or the IDLE editor (on Windows).
+The editor to use for editing spiders with the :command:`edit` command.
+Additionally, if the ``EDITOR`` environment variable is set, the :command:`edit`
+command will prefer it over the default setting.
 
 .. setting:: EXTENSIONS
 
@@ -462,24 +778,90 @@ EXTENSIONS_BASE
 Default::
 
     {
-        'scrapy.contrib.corestats.CoreStats': 0,
-        'scrapy.telnet.TelnetConsole': 0,
-        'scrapy.contrib.memusage.MemoryUsage': 0,
-        'scrapy.contrib.memdebug.MemoryDebugger': 0,
-        'scrapy.contrib.closespider.CloseSpider': 0,
-        'scrapy.contrib.feedexport.FeedExporter': 0,
-        'scrapy.contrib.logstats.LogStats': 0,
-        'scrapy.contrib.spiderstate.SpiderState': 0,
-        'scrapy.contrib.throttle.AutoThrottle': 0,
+        'scrapy.extensions.corestats.CoreStats': 0,
+        'scrapy.extensions.telnet.TelnetConsole': 0,
+        'scrapy.extensions.memusage.MemoryUsage': 0,
+        'scrapy.extensions.memdebug.MemoryDebugger': 0,
+        'scrapy.extensions.closespider.CloseSpider': 0,
+        'scrapy.extensions.feedexport.FeedExporter': 0,
+        'scrapy.extensions.logstats.LogStats': 0,
+        'scrapy.extensions.spiderstate.SpiderState': 0,
+        'scrapy.extensions.throttle.AutoThrottle': 0,
     }
 
-The list of available extensions. Keep in mind that some of them need to
-be enabled through a setting. By default, this setting contains all stable
-built-in extensions.
+A dict containing the extensions available by default in Scrapy, and their
+orders. This setting contains all stable built-in extensions. Keep in mind that
+some of them need to be enabled through a setting.
 
 For more information See the :ref:`extensions user guide  <topics-extensions>`
 and the :ref:`list of available extensions <topics-extensions-ref>`.
 
+
+.. setting:: FEED_TEMPDIR
+
+FEED_TEMPDIR
+------------
+
+The Feed Temp dir allows you to set a custom folder to save crawler
+temporary files before uploading with :ref:`FTP feed storage <topics-feed-storage-ftp>` and
+:ref:`Amazon S3 <topics-feed-storage-s3>`.
+
+.. setting:: FEED_STORAGE_GCS_ACL
+
+FEED_STORAGE_GCS_ACL
+--------------------
+
+The Access Control List (ACL) used when storing items to :ref:`Google Cloud Storage <topics-feed-storage-gcs>`.
+For more information on how to set this value, please refer to the column *JSON API* in `Google Cloud documentation <https://cloud.google.com/storage/docs/access-control/lists>`_.
+
+.. setting:: FTP_PASSIVE_MODE
+
+FTP_PASSIVE_MODE
+----------------
+
+Default: ``True``
+
+Whether or not to use passive mode when initiating FTP transfers.
+
+.. reqmeta:: ftp_password
+.. setting:: FTP_PASSWORD
+
+FTP_PASSWORD
+------------
+
+Default: ``"guest"``
+
+The password to use for FTP connections when there is no ``"ftp_password"``
+in ``Request`` meta.
+
+.. note::
+    Paraphrasing `RFC 1635`_, although it is common to use either the password
+    "guest" or one's e-mail address for anonymous FTP,
+    some FTP servers explicitly ask for the user's e-mail address
+    and will not allow login with the "guest" password.
+
+.. _RFC 1635: https://tools.ietf.org/html/rfc1635
+
+.. reqmeta:: ftp_user
+.. setting:: FTP_USER
+
+FTP_USER
+--------
+
+Default: ``"anonymous"``
+
+The username to use for FTP connections when there is no ``"ftp_user"``
+in ``Request`` meta.
+
+.. setting:: GCS_PROJECT_ID
+
+GCS_PROJECT_ID
+-----------------
+
+Default: ``None``
+
+The Project ID that will be used when storing data on `Google Cloud Storage`_.
+
 .. setting:: ITEM_PIPELINES
 
 ITEM_PIPELINES
@@ -487,12 +869,9 @@ ITEM_PIPELINES
 
 Default: ``{}``
 
-A dict containing the item pipelines to use, and their orders. The dict is
-empty by default order values are arbitrary but it's customary to define them
-in the 0-1000 range.
-
-Lists are supported in :setting:`ITEM_PIPELINES` for backwards compatibility,
-but they are deprecated.
+A dict containing the item pipelines to use, and their orders. Order values are
+arbitrary, but it is customary to define them in the 0-1000 range. Lower orders
+process before higher orders.
 
 Example::
 
@@ -536,7 +915,39 @@ LOG_FILE
 
 Default: ``None``
 
-File name to use for logging output. If None, standard error will be used.
+File name to use for logging output. If ``None``, standard error will be used.
+
+.. setting:: LOG_FORMAT
+
+LOG_FORMAT
+----------
+
+Default: ``'%(asctime)s [%(name)s] %(levelname)s: %(message)s'``
+
+String for formatting log messages. Refer to the
+:ref:`Python logging documentation <logrecord-attributes>` for the qwhole
+list of available placeholders.
+
+.. setting:: LOG_DATEFORMAT
+
+LOG_DATEFORMAT
+--------------
+
+Default: ``'%Y-%m-%d %H:%M:%S'``
+
+String for formatting date/time, expansion of the ``%(asctime)s`` placeholder
+in :setting:`LOG_FORMAT`. Refer to the
+:ref:`Python datetime documentation <strftime-strptime-behavior>` for the
+whole list of available directives.
+
+.. setting:: LOG_FORMATTER
+
+LOG_FORMATTER
+-------------
+
+Default: :class:`scrapy.logformatter.LogFormatter`
+
+The class to use for :ref:`formatting log messages <custom-log-formats>` for different actions.
 
 .. setting:: LOG_LEVEL
 
@@ -556,9 +967,29 @@ LOG_STDOUT
 Default: ``False``
 
 If ``True``, all standard output (and error) of your process will be redirected
-to the log. For example if you ``print 'hello'`` it will appear in the Scrapy
+to the log. For example if you ``print('hello')`` it will appear in the Scrapy
 log.
 
+.. setting:: LOG_SHORT_NAMES
+
+LOG_SHORT_NAMES
+---------------
+
+Default: ``False``
+
+If ``True``, the logs will just contain the root path. If it is set to ``False``
+then it displays the component responsible for the log output
+
+.. setting:: LOGSTATS_INTERVAL
+
+LOGSTATS_INTERVAL
+-----------------
+
+Default: ``60.0``
+
+The interval (in seconds) between each logging printout of the stats
+by :class:`~scrapy.extensions.logstats.LogStats`.
+
 .. setting:: MEMDEBUG_ENABLED
 
 MEMDEBUG_ENABLED
@@ -588,13 +1019,15 @@ Example::
 MEMUSAGE_ENABLED
 ----------------
 
-Default: ``False``
+Default: ``True``
 
-Scope: ``scrapy.contrib.memusage``
+Scope: ``scrapy.extensions.memusage``
 
-Whether to enable the memory usage extension that will shutdown the Scrapy
-process when it exceeds a memory limit, and also notify by email when that
-happened.
+Whether to enable the memory usage extension. This extension keeps track of
+a peak memory used by the process (it writes it to stats). It can also
+optionally shutdown the Scrapy process when it exceeds a memory limit
+(see :setting:`MEMUSAGE_LIMIT_MB`), and notify by email when that happened
+(see :setting:`MEMUSAGE_NOTIFY_MAIL`).
 
 See :ref:`topics-extensions-ref-memusage`.
 
@@ -605,13 +1038,33 @@ MEMUSAGE_LIMIT_MB
 
 Default: ``0``
 
-Scope: ``scrapy.contrib.memusage``
+Scope: ``scrapy.extensions.memusage``
 
 The maximum amount of memory to allow (in megabytes) before shutting down
 Scrapy  (if MEMUSAGE_ENABLED is True). If zero, no check will be performed.
 
 See :ref:`topics-extensions-ref-memusage`.
 
+.. setting:: MEMUSAGE_CHECK_INTERVAL_SECONDS
+
+MEMUSAGE_CHECK_INTERVAL_SECONDS
+-------------------------------
+
+.. versionadded:: 1.1
+
+Default: ``60.0``
+
+Scope: ``scrapy.extensions.memusage``
+
+The :ref:`Memory usage extension <topics-extensions-ref-memusage>`
+checks the current memory usage, versus the limits set by
+:setting:`MEMUSAGE_LIMIT_MB` and :setting:`MEMUSAGE_WARNING_MB`,
+at fixed time intervals.
+
+This sets the length of these intervals, in seconds.
+
+See :ref:`topics-extensions-ref-memusage`.
+
 .. setting:: MEMUSAGE_NOTIFY_MAIL
 
 MEMUSAGE_NOTIFY_MAIL
@@ -619,7 +1072,7 @@ MEMUSAGE_NOTIFY_MAIL
 
 Default: ``False``
 
-Scope: ``scrapy.contrib.memusage``
+Scope: ``scrapy.extensions.memusage``
 
 A list of emails to notify if the memory limit has been reached.
 
@@ -629,19 +1082,6 @@ Example::
 
 See :ref:`topics-extensions-ref-memusage`.
 
-.. setting:: MEMUSAGE_REPORT
-
-MEMUSAGE_REPORT
----------------
-
-Default: ``False``
-
-Scope: ``scrapy.contrib.memusage``
-
-Whether to send a memory usage report after each spider has been closed.
-
-See :ref:`topics-extensions-ref-memusage`.
-
 .. setting:: MEMUSAGE_WARNING_MB
 
 MEMUSAGE_WARNING_MB
@@ -649,7 +1089,7 @@ MEMUSAGE_WARNING_MB
 
 Default: ``0``
 
-Scope: ``scrapy.contrib.memusage``
+Scope: ``scrapy.extensions.memusage``
 
 The maximum amount of memory to allow (in megabytes) before sending a warning
 email notifying about it. If zero, no warning will be produced.
@@ -674,8 +1114,7 @@ RANDOMIZE_DOWNLOAD_DELAY
 
 Default: ``True``
 
-If enabled, Scrapy will wait a random amount of time (between 0.5 and 1.5
-* :setting:`DOWNLOAD_DELAY`) while fetching requests from the same
+If enabled, Scrapy will wait a random amount of time (between 0.5 * :setting:`DOWNLOAD_DELAY` and 1.5 * :setting:`DOWNLOAD_DELAY`) while fetching requests from the same
 website.
 
 This randomization decreases the chance of the crawler being detected (and
@@ -686,28 +1125,19 @@ The randomization policy is the same used by `wget`_ ``--random-wait`` option.
 
 If :setting:`DOWNLOAD_DELAY` is zero (default) this option has no effect.
 
-.. _wget: http://www.gnu.org/software/wget/manual/wget.html
+.. _wget: https://www.gnu.org/software/wget/manual/wget.html
 
-.. setting:: REDIRECT_MAX_TIMES
+.. setting:: REACTOR_THREADPOOL_MAXSIZE
 
-REDIRECT_MAX_TIMES
-------------------
+REACTOR_THREADPOOL_MAXSIZE
+--------------------------
 
-Default: ``20``
+Default: ``10``
 
-Defines the maximum times a request can be redirected. After this maximum the
-request's response is returned as is. We used Firefox default value for the
-same task.
-
-.. setting:: REDIRECT_MAX_METAREFRESH_DELAY
-
-REDIRECT_MAX_METAREFRESH_DELAY
-------------------------------
-
-Default: ``100``
-
-Some sites use meta-refresh for redirecting to a session expired page, so we
-restrict automatic redirection to a maximum delay (in seconds)
+The maximum limit for Twisted Reactor thread pool size. This is common
+multi-purpose thread pool used by various Scrapy components. Threaded
+DNS Resolver, BlockingFeedStorage, S3FilesStore just to name a few. Increase
+this value if you're experiencing problems with insufficient blocking IO.
 
 .. setting:: REDIRECT_PRIORITY_ADJUST
 
@@ -716,8 +1146,26 @@ REDIRECT_PRIORITY_ADJUST
 
 Default: ``+2``
 
-Adjust redirect request priority relative to original request.
-A negative priority adjust means more priority.
+Scope: ``scrapy.downloadermiddlewares.redirect.RedirectMiddleware``
+
+Adjust redirect request priority relative to original request:
+
+- **a positive priority adjust (default) means higher priority.**
+- a negative priority adjust means lower priority.
+
+.. setting:: RETRY_PRIORITY_ADJUST
+
+RETRY_PRIORITY_ADJUST
+---------------------
+
+Default: ``-1``
+
+Scope: ``scrapy.downloadermiddlewares.retry.RetryMiddleware``
+
+Adjust retry request priority relative to original request:
+
+- a positive priority adjust means higher priority.
+- **a negative priority adjust (default) means lower priority.**
 
 .. setting:: ROBOTSTXT_OBEY
 
@@ -726,10 +1174,38 @@ ROBOTSTXT_OBEY
 
 Default: ``False``
 
-Scope: ``scrapy.contrib.downloadermiddleware.robotstxt``
+Scope: ``scrapy.downloadermiddlewares.robotstxt``
 
 If enabled, Scrapy will respect robots.txt policies. For more information see
-:ref:`topics-dlmw-robots`
+:ref:`topics-dlmw-robots`.
+
+.. note::
+
+    While the default value is ``False`` for historical reasons,
+    this option is enabled by default in settings.py file generated
+    by ``scrapy startproject`` command.
+
+.. setting:: ROBOTSTXT_PARSER
+
+ROBOTSTXT_PARSER
+----------------
+
+Default: ``'scrapy.robotstxt.ProtegoRobotParser'``
+
+The parser backend to use for parsing ``robots.txt`` files. For more information see
+:ref:`topics-dlmw-robots`.
+
+.. setting:: ROBOTSTXT_USER_AGENT
+
+ROBOTSTXT_USER_AGENT
+^^^^^^^^^^^^^^^^^^^^
+
+Default: ``None``
+
+The user agent string to use for matching in the robots.txt file. If ``None``,
+the User-Agent header you are sending with the request or the
+:setting:`USER_AGENT` setting (in that order) will be used for determining
+the user agent to use in the robots.txt file.
 
 .. setting:: SCHEDULER
 
@@ -740,6 +1216,72 @@ Default: ``'scrapy.core.scheduler.Scheduler'``
 
 The scheduler to use for crawling.
 
+.. setting:: SCHEDULER_DEBUG
+
+SCHEDULER_DEBUG
+---------------
+
+Default: ``False``
+
+Setting to ``True`` will log debug information about the requests scheduler.
+This currently logs (only once) if the requests cannot be serialized to disk.
+Stats counter (``scheduler/unserializable``) tracks the number of times this happens.
+
+Example entry in logs::
+
+    1956-01-31 00:00:00+0800 [scrapy.core.scheduler] ERROR: Unable to serialize request:
+    <GET http://example.com> - reason: cannot serialize <Request at 0x9a7c7ec>
+    (type Request)> - no more unserializable requests will be logged
+    (see 'scheduler/unserializable' stats counter)
+
+
+.. setting:: SCHEDULER_DISK_QUEUE
+
+SCHEDULER_DISK_QUEUE
+--------------------
+
+Default: ``'scrapy.squeues.PickleLifoDiskQueue'``
+
+Type of disk queue that will be used by scheduler. Other available types are
+``scrapy.squeues.PickleFifoDiskQueue``, ``scrapy.squeues.MarshalFifoDiskQueue``,
+``scrapy.squeues.MarshalLifoDiskQueue``.
+
+.. setting:: SCHEDULER_MEMORY_QUEUE
+
+SCHEDULER_MEMORY_QUEUE
+----------------------
+Default: ``'scrapy.squeues.LifoMemoryQueue'``
+
+Type of in-memory queue used by scheduler. Other available type is:
+``scrapy.squeues.FifoMemoryQueue``.
+
+.. setting:: SCHEDULER_PRIORITY_QUEUE
+
+SCHEDULER_PRIORITY_QUEUE
+------------------------
+Default: ``'scrapy.pqueues.ScrapyPriorityQueue'``
+
+Type of priority queue used by the scheduler. Another available type is
+``scrapy.pqueues.DownloaderAwarePriorityQueue``.
+``scrapy.pqueues.DownloaderAwarePriorityQueue`` works better than
+``scrapy.pqueues.ScrapyPriorityQueue`` when you crawl many different
+domains in parallel. But currently ``scrapy.pqueues.DownloaderAwarePriorityQueue``
+does not work together with :setting:`CONCURRENT_REQUESTS_PER_IP`.
+
+.. setting:: SCRAPER_SLOT_MAX_ACTIVE_SIZE
+
+SCRAPER_SLOT_MAX_ACTIVE_SIZE
+----------------------------
+
+.. versionadded:: 2.0
+
+Default: ``5_000_000``
+
+Soft limit (in bytes) for response data being processed.
+
+While the sum of the sizes of all responses being processed is above this value,
+Scrapy does not process new requests.
+
 .. setting:: SPIDER_CONTRACTS
 
 SPIDER_CONTRACTS
@@ -747,7 +1289,7 @@ SPIDER_CONTRACTS
 
 Default:: ``{}``
 
-A dict containing the scrapy contracts enabled in your project, used for
+A dict containing the spider contracts enabled in your project, used for
 testing spiders. For more info see :ref:`topics-contracts`.
 
 .. setting:: SPIDER_CONTRACTS_BASE
@@ -763,10 +1305,51 @@ Default::
         'scrapy.contracts.default.ScrapesContract': 3,
     }
 
-A dict containing the scrapy contracts enabled by default in Scrapy. You should
+A dict containing the Scrapy contracts enabled by default in Scrapy. You should
 never modify this setting in your project, modify :setting:`SPIDER_CONTRACTS`
 instead. For more info see :ref:`topics-contracts`.
 
+You can disable any of these contracts by assigning ``None`` to their class
+path in :setting:`SPIDER_CONTRACTS`. E.g., to disable the built-in
+``ScrapesContract``, place this in your ``settings.py``::
+
+    SPIDER_CONTRACTS = {
+        'scrapy.contracts.default.ScrapesContract': None,
+    }
+
+.. setting:: SPIDER_LOADER_CLASS
+
+SPIDER_LOADER_CLASS
+-------------------
+
+Default: ``'scrapy.spiderloader.SpiderLoader'``
+
+The class that will be used for loading spiders, which must implement the
+:ref:`topics-api-spiderloader`.
+
+.. setting:: SPIDER_LOADER_WARN_ONLY
+
+SPIDER_LOADER_WARN_ONLY
+-----------------------
+
+.. versionadded:: 1.3.3
+
+Default: ``False``
+
+By default, when Scrapy tries to import spider classes from :setting:`SPIDER_MODULES`,
+it will fail loudly if there is any ``ImportError`` exception.
+But you can choose to silence this exception and turn it into a simple
+warning by setting ``SPIDER_LOADER_WARN_ONLY = True``.
+
+.. note::
+    Some :ref:`scrapy commands <topics-commands>` run with this setting to ``True``
+    already (i.e. they will only issue a warning and will not fail)
+    since they do not actually need to load spider classes to work:
+    :command:`scrapy runspider <runspider>`,
+    :command:`scrapy settings <settings>`,
+    :command:`scrapy startproject <startproject>`,
+    :command:`scrapy version <version>`.
+
 .. setting:: SPIDER_MIDDLEWARES
 
 SPIDER_MIDDLEWARES
@@ -785,17 +1368,16 @@ SPIDER_MIDDLEWARES_BASE
 Default::
 
     {
-        'scrapy.contrib.spidermiddleware.httperror.HttpErrorMiddleware': 50,
-        'scrapy.contrib.spidermiddleware.offsite.OffsiteMiddleware': 500,
-        'scrapy.contrib.spidermiddleware.referer.RefererMiddleware': 700,
-        'scrapy.contrib.spidermiddleware.urllength.UrlLengthMiddleware': 800,
-        'scrapy.contrib.spidermiddleware.depth.DepthMiddleware': 900,
+        'scrapy.spidermiddlewares.httperror.HttpErrorMiddleware': 50,
+        'scrapy.spidermiddlewares.offsite.OffsiteMiddleware': 500,
+        'scrapy.spidermiddlewares.referer.RefererMiddleware': 700,
+        'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware': 800,
+        'scrapy.spidermiddlewares.depth.DepthMiddleware': 900,
     }
 
-A dict containing the spider middlewares enabled by default in Scrapy. You
-should never modify this setting in your project, modify
-:setting:`SPIDER_MIDDLEWARES` instead. For more info see
-:ref:`topics-spider-middleware-setting`.
+A dict containing the spider middlewares enabled by default in Scrapy, and
+their orders. Low orders are closer to the engine, high orders are closer to
+the spider. For more info see :ref:`topics-spider-middleware-setting`.
 
 .. setting:: SPIDER_MODULES
 
@@ -815,7 +1397,7 @@ Example::
 STATS_CLASS
 -----------
 
-Default: ``'scrapy.statscol.MemoryStatsCollector'``
+Default: ``'scrapy.statscollectors.MemoryStatsCollector'``
 
 The class to use for collecting stats, who must implement the
 :ref:`topics-api-stats`.
@@ -840,7 +1422,7 @@ STATSMAILER_RCPTS
 Default: ``[]`` (empty list)
 
 Send Scrapy stats after spiders finish scraping. See
-:class:`~scrapy.contrib.statsmailer.StatsMailer` for more info.
+:class:`~scrapy.extensions.statsmailer.StatsMailer` for more info.
 
 .. setting:: TELNETCONSOLE_ENABLED
 
@@ -852,17 +1434,6 @@ Default: ``True``
 A boolean which specifies if the :ref:`telnet console <topics-telnetconsole>`
 will be enabled (provided its extension is also enabled).
 
-.. setting:: TELNETCONSOLE_PORT
-
-TELNETCONSOLE_PORT
-------------------
-
-Default: ``[6023, 6073]``
-
-The port range to use for the telnet console. If set to ``None`` or ``0``, a
-dynamically assigned port is used. For more info see
-:ref:`topics-telnetconsole`.
-
 .. setting:: TEMPLATES_DIR
 
 TEMPLATES_DIR
@@ -871,7 +1442,107 @@ TEMPLATES_DIR
 Default: ``templates`` dir inside scrapy module
 
 The directory where to look for templates when creating new projects with
-:command:`startproject` command.
+:command:`startproject` command and new spiders with :command:`genspider`
+command.
+
+The project name must not conflict with the name of custom files or directories
+in the ``project`` subdirectory.
+
+.. setting:: TWISTED_REACTOR
+
+TWISTED_REACTOR
+---------------
+
+.. versionadded:: 2.0
+
+Default: ``None``
+
+Import path of a given :mod:`~twisted.internet.reactor`.
+
+Scrapy will install this reactor if no other reactor is installed yet, such as
+when the ``scrapy`` CLI program is invoked or when using the
+:class:`~scrapy.crawler.CrawlerProcess` class.
+
+If you are using the :class:`~scrapy.crawler.CrawlerRunner` class, you also
+need to install the correct reactor manually. You can do that using
+:func:`~scrapy.utils.reactor.install_reactor`:
+
+.. autofunction:: scrapy.utils.reactor.install_reactor
+
+If a reactor is already installed,
+:func:`~scrapy.utils.reactor.install_reactor` has no effect.
+
+:meth:`CrawlerRunner.__init__ <scrapy.crawler.CrawlerRunner.__init__>` raises
+:exc:`Exception` if the installed reactor does not match the
+:setting:`TWISTED_REACTOR` setting; therfore, having top-level
+:mod:`~twisted.internet.reactor` imports in project files and imported
+third-party libraries will make Scrapy raise :exc:`Exception` when
+it checks which reactor is installed.
+
+In order to use the reactor installed by Scrapy::
+
+    import scrapy
+    from twisted.internet import reactor
+
+
+    class QuotesSpider(scrapy.Spider):
+        name = 'quotes'
+
+        def __init__(self, *args, **kwargs):
+            self.timeout = int(kwargs.pop('timeout', '60'))
+            super(QuotesSpider, self).__init__(*args, **kwargs)
+
+        def start_requests(self):
+            reactor.callLater(self.timeout, self.stop)
+
+            urls = ['http://quotes.toscrape.com/page/1']
+            for url in urls:
+                yield scrapy.Request(url=url, callback=self.parse)
+
+        def parse(self, response):
+            for quote in response.css('div.quote'):
+                yield {'text': quote.css('span.text::text').get()}
+
+        def stop(self):
+            self.crawler.engine.close_spider(self, 'timeout')
+
+
+which raises :exc:`Exception`, becomes::
+
+    import scrapy
+
+
+    class QuotesSpider(scrapy.Spider):
+        name = 'quotes'
+
+        def __init__(self, *args, **kwargs):
+            self.timeout = int(kwargs.pop('timeout', '60'))
+            super(QuotesSpider, self).__init__(*args, **kwargs)
+
+        def start_requests(self):
+            from twisted.internet import reactor
+            reactor.callLater(self.timeout, self.stop)
+
+            urls = ['http://quotes.toscrape.com/page/1']
+            for url in urls:
+                yield scrapy.Request(url=url, callback=self.parse)
+
+        def parse(self, response):
+            for quote in response.css('div.quote'):
+                yield {'text': quote.css('span.text::text').get()}
+
+        def stop(self):
+            self.crawler.engine.close_spider(self, 'timeout')
+
+
+The default value of the :setting:`TWISTED_REACTOR` setting is ``None``, which
+means that Scrapy will not attempt to install any specific reactor, and the
+default reactor defined by Twisted for the current platform will be used. This
+is to maintain backward compatibility and avoid possible problems caused by
+using a non-default reactor.
+
+For additional information, see :doc:`core/howto/choosing-reactor`.
+
 
 .. setting:: URLLENGTH_LIMIT
 
@@ -880,20 +1551,34 @@ URLLENGTH_LIMIT
 
 Default: ``2083``
 
-Scope: ``contrib.spidermiddleware.urllength``
+Scope: ``spidermiddlewares.urllength``
 
 The maximum URL length to allow for crawled URLs. For more information about
-the default value for this setting see: http://www.boutell.com/newfaq/misc/urllength.html
+the default value for this setting see: https://boutell.com/newfaq/misc/urllength.html
 
 .. setting:: USER_AGENT
 
 USER_AGENT
 ----------
 
-Default: ``"Scrapy/VERSION (+http://scrapy.org)"``
+Default: ``"Scrapy/VERSION (+https://scrapy.org)"``
 
-The default User-Agent to use when crawling, unless overridden.
+The default User-Agent to use when crawling, unless overridden. This user agent is
+also used by :class:`~scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware`
+if :setting:`ROBOTSTXT_USER_AGENT` setting is ``None`` and
+there is no overridding User-Agent header specified for the request.
 
-.. _Amazon web services: http://aws.amazon.com/
-.. _breadth-first order: http://en.wikipedia.org/wiki/Breadth-first_search
-.. _depth-first order: http://en.wikipedia.org/wiki/Depth-first_search
+
+Settings documented elsewhere:
+------------------------------
+
+The following settings are documented elsewhere, please check each specific
+case to see how to enable and use them.
+
+.. settingslist::
+
+
+.. _Amazon web services: https://aws.amazon.com/
+.. _breadth-first order: https://en.wikipedia.org/wiki/Breadth-first_search
+.. _depth-first order: https://en.wikipedia.org/wiki/Depth-first_search
+.. _Google Cloud Storage: https://cloud.google.com/storage/
diff --git a/docs/topics/shell.rst b/docs/topics/shell.rst
index 37268c3ca..0f46f1c87 100644
--- a/docs/topics/shell.rst
+++ b/docs/topics/shell.rst
@@ -17,6 +17,9 @@ spider, without having to run the spider to test every change.
 Once you get familiarized with the Scrapy shell, you'll see that it's an
 invaluable tool for developing and debugging your spiders.
 
+Configuring the shell
+=====================
+
 If you have `IPython`_ installed, the Scrapy shell will use it (instead of the
 standard Python console). The `IPython`_ console is much more powerful and
 provides smart auto-completion and colorized output, among other things.
@@ -25,8 +28,20 @@ We highly recommend you install `IPython`_, specially if you're working on
 Unix systems (where `IPython`_ excels). See the `IPython installation guide`_
 for more info.
 
-.. _IPython: http://ipython.org/
-.. _IPython installation guide: http://ipython.org/install.html
+Scrapy also has support for `bpython`_, and will try to use it where `IPython`_
+is unavailable.
+
+Through Scrapy's settings you can configure it to use any one of
+``ipython``, ``bpython`` or the standard ``python`` shell, regardless of which
+are installed. This is done by setting the ``SCRAPY_PYTHON_SHELL`` environment
+variable; or by defining it in your :ref:`scrapy.cfg <topics-config-settings>`::
+
+    [settings]
+    shell = bpython
+
+.. _IPython: https://ipython.org/
+.. _IPython installation guide: https://ipython.org/install.html
+.. _bpython: https://bpython-interpreter.org/
 
 Launch the shell
 ================
@@ -38,6 +53,38 @@ this::
 
 Where the ``<url>`` is the URL you want to scrape.
 
+:command:`shell` also works for local files. This can be handy if you want
+to play around with a local copy of a web page. :command:`shell` understands
+the following syntaxes for local files::
+
+    # UNIX-style
+    scrapy shell ./path/to/file.html
+    scrapy shell ../other/path/to/file.html
+    scrapy shell /absolute/path/to/file.html
+
+    # File URI
+    scrapy shell file:///absolute/path/to/file.html
+
+.. note:: When using relative file paths, be explicit and prepend them
+    with ``./`` (or ``../`` when relevant).
+    ``scrapy shell index.html`` will not work as one might expect (and
+    this is by design, not a bug).
+
+    Because :command:`shell` favors HTTP URLs over File URIs,
+    and ``index.html`` being syntactically similar to ``example.com``,
+    :command:`shell` will treat ``index.html`` as a domain name and trigger
+    a DNS lookup error::
+
+        $ scrapy shell index.html
+        [ ... scrapy shell starts ... ]
+        [ ... traceback ... ]
+        twisted.internet.error.DNSLookupError: DNS lookup failed:
+        address 'index.html' not found: [Errno -5] No address associated with hostname.
+
+    :command:`shell` will not test beforehand if a file called ``index.html``
+    exists in the current directory. Again, be explicit.
+
+
 Using the shell
 ===============
 
@@ -50,13 +97,17 @@ Available Shortcuts
 
  * ``shelp()`` - print a help with the list of available objects and shortcuts
 
- * ``fetch(request_or_url)`` - fetch a new response from the given request or
-   URL and update all related objects accordingly.
+ * ``fetch(url[, redirect=True])`` - fetch a new response from the given
+   URL and update all related objects accordingly. You can optionaly ask for
+   HTTP 3xx redirections to not be followed by passing ``redirect=False``
+
+ * ``fetch(request)`` - fetch a new response from the given request and
+   update all related objects accordingly.
 
  * ``view(response)`` - open the given response in your local web browser, for
    inspection. This will add a `\<base\> tag`_ to the response body in order
    for external links (such as images and style sheets) to display properly.
-   Note, however,that this will create a temporary file in your computer,
+   Note, however, that this will create a temporary file in your computer,
    which won't be removed automatically.
 
 .. _<base> tag: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/base
@@ -74,7 +125,7 @@ Those objects are:
  * ``crawler`` - the current :class:`~scrapy.crawler.Crawler` object.
 
  * ``spider`` - the Spider which is known to handle the URL, or a
-   :class:`~scrapy.spider.Spider` object if there is no spider found for
+   :class:`~scrapy.spiders.Spider` object if there is no spider found for
    the current URL
 
  * ``request`` - a :class:`~scrapy.http.Request` object of the last fetched
@@ -85,19 +136,16 @@ Those objects are:
  * ``response`` - a :class:`~scrapy.http.Response` object containing the last
    fetched page
 
- * ``sel`` - a :class:`~scrapy.selector.Selector` object constructed
-   with the last response fetched
-
  * ``settings`` - the current :ref:`Scrapy settings <topics-settings>`
 
 Example of shell session
 ========================
 
 Here's an example of a typical shell session where we start by scraping the
-http://scrapy.org page, and then proceed to scrape the http://slashdot.org
-page. Finally, we modify the (Slashdot) request method to POST and re-fetch it
-getting a HTTP 405 (method not allowed) error. We end the session by typing
-Ctrl-D (in Unix systems) or Ctrl-Z in Windows.
+https://scrapy.org page, and then proceed to scrape the https://old.reddit.com/
+page. Finally, we modify the (Reddit) request method to POST and re-fetch it
+getting an error. We end the session by typing Ctrl-D (in Unix systems) or
+Ctrl-Z in Windows.
 
 Keep in mind that the data extracted here may not be the same when you try it,
 as those pages are not static and could have changed by the time you test this.
@@ -106,57 +154,81 @@ shell works.
 
 First, we launch the shell::
 
-    scrapy shell 'http://scrapy.org' --nolog
+    scrapy shell 'https://scrapy.org' --nolog
+
+.. note::
+
+   Remember to always enclose URLs in quotes when running the Scrapy shell from
+   the command line, otherwise URLs containing arguments (i.e. the ``&`` character)
+   will not work.
+
+   On Windows, use double quotes instead::
+
+       scrapy shell "https://scrapy.org" --nolog
+
 
 Then, the shell fetches the URL (using the Scrapy downloader) and prints the
 list of available objects and useful shortcuts (you'll notice that these lines
 all start with the ``[s]`` prefix)::
 
     [s] Available Scrapy objects:
-    [s]   crawler    <scrapy.crawler.Crawler object at 0x1e16b50>
+    [s]   scrapy     scrapy module (contains scrapy.Request, scrapy.Selector, etc)
+    [s]   crawler    <scrapy.crawler.Crawler object at 0x7f07395dd690>
     [s]   item       {}
-    [s]   request    <GET http://scrapy.org>
-    [s]   response   <200 http://scrapy.org>
-    [s]   sel        <Selector xpath=None data=u'<html>\n  <head>\n    <meta charset="utf-8'>
-    [s]   settings   <scrapy.settings.Settings object at 0x2bfd650>
-    [s]   spider     <Spider 'default' at 0x20c6f50>
+    [s]   request    <GET https://scrapy.org>
+    [s]   response   <200 https://scrapy.org/>
+    [s]   settings   <scrapy.settings.Settings object at 0x7f07395dd710>
+    [s]   spider     <DefaultSpider 'default' at 0x7f0735891690>
     [s] Useful shortcuts:
+    [s]   fetch(url[, redirect=True]) Fetch URL and update local objects (by default, redirects are followed)
+    [s]   fetch(req)                  Fetch a scrapy.Request and update local objects
     [s]   shelp()           Shell help (print this help)
-    [s]   fetch(req_or_url) Fetch request (or URL) and update local objects
     [s]   view(response)    View response in a browser
 
     >>>
 
-After that, we can start playing with the objects::
 
-    >>> sel.xpath("//h2/text()").extract()[0]
-    u'Welcome to Scrapy'
+After that, we can start playing with the objects:
 
-    >>> fetch("http://slashdot.org")
-    [s] Available Scrapy objects:
-    [s]   crawler    <scrapy.crawler.Crawler object at 0x1a13b50>
-    [s]   item       {}
-    [s]   request    <GET http://slashdot.org>
-    [s]   response   <200 http://slashdot.org>
-    [s]   sel        <Selector xpath=None data=u'<html lang="en">\n<head>\n\n\n\n\n<script id="'>
-    [s]   settings   <scrapy.settings.Settings object at 0x2bfd650>
-    [s]   spider     <Spider 'default' at 0x20c6f50>
-    [s] Useful shortcuts:
-    [s]   shelp()           Shell help (print this help)
-    [s]   fetch(req_or_url) Fetch request (or URL) and update local objects
-    [s]   view(response)    View response in a browser
+>>> response.xpath('//title/text()').get()
+'Scrapy | A Fast and Powerful Scraping and Web Crawling Framework'
 
-    >>> sel.xpath('//title/text()').extract()
-    [u'Slashdot: News for nerds, stuff that matters']
+>>> fetch("https://old.reddit.com/")
 
-    >>> request = request.replace(method="POST")
+>>> response.xpath('//title/text()').get()
+'reddit: the front page of the internet'
 
-    >>> fetch(request)
-    [s] Available Scrapy objects:
-    [s]   crawler    <scrapy.crawler.Crawler object at 0x1e16b50>
-    ...
+>>> request = request.replace(method="POST")
+
+>>> fetch(request)
+
+>>> response.status
+404
+
+>>> from pprint import pprint
+
+>>> pprint(response.headers)
+{'Accept-Ranges': ['bytes'],
+ 'Cache-Control': ['max-age=0, must-revalidate'],
+ 'Content-Type': ['text/html; charset=UTF-8'],
+ 'Date': ['Thu, 08 Dec 2016 16:21:19 GMT'],
+ 'Server': ['snooserv'],
+ 'Set-Cookie': ['loid=KqNLou0V9SKMX4qb4n; Domain=reddit.com; Max-Age=63071999; Path=/; expires=Sat, 08-Dec-2018 16:21:19 GMT; secure',
+                'loidcreated=2016-12-08T16%3A21%3A19.445Z; Domain=reddit.com; Max-Age=63071999; Path=/; expires=Sat, 08-Dec-2018 16:21:19 GMT; secure',
+                'loid=vi0ZVe4NkxNWdlH7r7; Domain=reddit.com; Max-Age=63071999; Path=/; expires=Sat, 08-Dec-2018 16:21:19 GMT; secure',
+                'loidcreated=2016-12-08T16%3A21%3A19.459Z; Domain=reddit.com; Max-Age=63071999; Path=/; expires=Sat, 08-Dec-2018 16:21:19 GMT; secure'],
+ 'Vary': ['accept-encoding'],
+ 'Via': ['1.1 varnish'],
+ 'X-Cache': ['MISS'],
+ 'X-Cache-Hits': ['0'],
+ 'X-Content-Type-Options': ['nosniff'],
+ 'X-Frame-Options': ['SAMEORIGIN'],
+ 'X-Moose': ['majestic'],
+ 'X-Served-By': ['cache-cdg8730-CDG'],
+ 'X-Timer': ['S1481214079.394283,VS0,VE159'],
+ 'X-Ua-Compatible': ['IE=edge'],
+ 'X-Xss-Protection': ['1; mode=block']}
 
-    >>>
 
 .. _topics-shell-inspect-response:
 
@@ -186,14 +258,14 @@ Here's an example of how you would call it from your spider::
             # We want to inspect one specific response.
             if ".org" in response.url:
                 from scrapy.shell import inspect_response
-                inspect_response(response)
+                inspect_response(response, self)
 
             # Rest of parsing code.
 
 When you run the spider, you will get something similar to this::
 
-    2014-01-23 17:48:31-0400 [myspider] DEBUG: Crawled (200) <GET http://example.com> (referer: None)
-    2014-01-23 17:48:31-0400 [myspider] DEBUG: Crawled (200) <GET http://example.org> (referer: None)
+    2014-01-23 17:48:31-0400 [scrapy.core.engine] DEBUG: Crawled (200) <GET http://example.com> (referer: None)
+    2014-01-23 17:48:31-0400 [scrapy.core.engine] DEBUG: Crawled (200) <GET http://example.org> (referer: None)
     [s] Available Scrapy objects:
     [s]   crawler    <scrapy.crawler.Crawler object at 0x1e16b50>
     ...
@@ -201,22 +273,22 @@ When you run the spider, you will get something similar to this::
     >>> response.url
     'http://example.org'
 
-Then, you can check if the extraction code is working::
+Then, you can check if the extraction code is working:
 
-    >>> sel.xpath('//h1[@class="fn"]')
-    []
+>>> response.xpath('//h1[@class="fn"]')
+[]
 
 Nope, it doesn't. So you can open the response in your web browser and see if
-it's the response you were expecting::
+it's the response you were expecting:
 
-    >>> view(response)
-    True
+>>> view(response)
+True
 
 Finally you hit Ctrl-D (or Ctrl-Z in Windows) to exit the shell and resume the
 crawling::
 
     >>> ^D
-    2014-01-23 17:50:03-0400 [myspider] DEBUG: Crawled (200) <GET http://example.net> (referer: None)
+    2014-01-23 17:50:03-0400 [scrapy.core.engine] DEBUG: Crawled (200) <GET http://example.net> (referer: None)
     ...
 
 Note that you can't use the ``fetch`` shortcut here since the Scrapy engine is
diff --git a/docs/topics/signals.rst b/docs/topics/signals.rst
index 5407141db..1d99d8c28 100644
--- a/docs/topics/signals.rst
+++ b/docs/topics/signals.rst
@@ -16,13 +16,80 @@ deliver the arguments that the handler receives.
 You can connect to signals (or send your own) through the
 :ref:`topics-api-signals`.
 
+Here is a simple example showing how you can catch signals and perform some action::
+
+    from scrapy import signals
+    from scrapy import Spider
+
+
+    class DmozSpider(Spider):
+        name = "dmoz"
+        allowed_domains = ["dmoz.org"]
+        start_urls = [
+            "http://www.dmoz.org/Computers/Programming/Languages/Python/Books/",
+            "http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/",
+        ]
+
+
+        @classmethod
+        def from_crawler(cls, crawler, *args, **kwargs):
+            spider = super(DmozSpider, cls).from_crawler(crawler, *args, **kwargs)
+            crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed)
+            return spider
+
+
+        def spider_closed(self, spider):
+            spider.logger.info('Spider closed: %s', spider.name)
+
+
+        def parse(self, response):
+            pass
+
+.. _signal-deferred:
+
 Deferred signal handlers
 ========================
 
-Some signals support returning `Twisted deferreds`_ from their handlers, see
-the :ref:`topics-signals-ref` below to know which ones.
+Some signals support returning :class:`~twisted.internet.defer.Deferred`
+objects from their handlers, allowing you to run asynchronous code that
+does not block Scrapy. If a signal handler returns a
+:class:`~twisted.internet.defer.Deferred`, Scrapy waits for that
+:class:`~twisted.internet.defer.Deferred` to fire.
 
-.. _Twisted deferreds: http://twistedmatrix.com/documents/current/core/howto/defer.html
+Let's take an example::
+
+    class SignalSpider(scrapy.Spider):
+        name = 'signals'
+        start_urls = ['http://quotes.toscrape.com/page/1/']
+
+        @classmethod
+        def from_crawler(cls, crawler, *args, **kwargs):
+            spider = super(SignalSpider, cls).from_crawler(crawler, *args, **kwargs)
+            crawler.signals.connect(spider.item_scraped, signal=signals.item_scraped)
+            return spider
+
+        def item_scraped(self, item):
+            # Send the scraped item to the server
+            d = treq.post(
+                'http://example.com/post',
+                json.dumps(item).encode('ascii'),
+                headers={b'Content-Type': [b'application/json']}
+            )
+
+            # The next item will be scraped only after
+            # deferred (d) is fired
+            return d
+
+        def parse(self, response):
+            for quote in response.css('div.quote'):
+                yield {
+                    'text': quote.css('span.text::text').get(),
+                    'author': quote.css('small.author::text').get(),
+                    'tags': quote.css('div.tags a.tag::text').getall(),
+                }
+
+See the :ref:`topics-signals-ref` below to know which signals support
+:class:`~twisted.internet.defer.Deferred`.
 
 .. _topics-signals-ref:
 
@@ -34,22 +101,25 @@ Built-in signals reference
 
 Here's the list of Scrapy built-in signals and their meaning.
 
-engine_started
+Engine signals
 --------------
 
+engine_started
+~~~~~~~~~~~~~~
+
 .. signal:: engine_started
 .. function:: engine_started()
 
     Sent when the Scrapy engine has started crawling.
 
-    This signal supports returning deferreds from their handlers.
+    This signal supports returning deferreds from its handlers.
 
 .. note:: This signal may be fired *after* the :signal:`spider_opened` signal,
     depending on how the spider was started. So **don't** rely on this signal
     getting fired before :signal:`spider_opened`.
 
 engine_stopped
---------------
+~~~~~~~~~~~~~~
 
 .. signal:: engine_stopped
 .. function:: engine_stopped()
@@ -57,10 +127,21 @@ engine_stopped
     Sent when the Scrapy engine is stopped (for example, when a crawling
     process has finished).
 
-    This signal supports returning deferreds from their handlers.
+    This signal supports returning deferreds from its handlers.
+
+Item signals
+------------
+
+.. note::
+    As at max :setting:`CONCURRENT_ITEMS` items are processed in
+    parallel, many deferreds are fired together using
+    :class:`~twisted.internet.defer.DeferredList`. Hence the next
+    batch waits for the :class:`~twisted.internet.defer.DeferredList`
+    to fire and then runs the respective item signal handler for
+    the next batch of scraped items.
 
 item_scraped
-------------
+~~~~~~~~~~~~
 
 .. signal:: item_scraped
 .. function:: item_scraped(item, response, spider)
@@ -68,19 +149,19 @@ item_scraped
     Sent when an item has been scraped, after it has passed all the
     :ref:`topics-item-pipeline` stages (without being dropped).
 
-    This signal supports returning deferreds from their handlers.
+    This signal supports returning deferreds from its handlers.
 
-    :param item: the item scraped
-    :type item: :class:`~scrapy.item.Item` object
+    :param item: the scraped item
+    :type item: :ref:`item object <item-types>`
 
     :param spider: the spider which scraped the item
-    :type spider: :class:`~scrapy.spider.Spider` object
+    :type spider: :class:`~scrapy.spiders.Spider` object
 
     :param response: the response from where the item was scraped
     :type response: :class:`~scrapy.http.Response` object
 
 item_dropped
-------------
+~~~~~~~~~~~~
 
 .. signal:: item_dropped
 .. function:: item_dropped(item, response, exception, spider)
@@ -88,13 +169,13 @@ item_dropped
     Sent after an item has been dropped from the :ref:`topics-item-pipeline`
     when some stage raised a :exc:`~scrapy.exceptions.DropItem` exception.
 
-    This signal supports returning deferreds from their handlers.
+    This signal supports returning deferreds from its handlers.
 
     :param item: the item dropped from the :ref:`topics-item-pipeline`
-    :type item: :class:`~scrapy.item.Item` object
+    :type item: :ref:`item object <item-types>`
 
     :param spider: the spider which scraped the item
-    :type spider: :class:`~scrapy.spider.Spider` object
+    :type spider: :class:`~scrapy.spiders.Spider` object
 
     :param response: the response from where the item was dropped
     :type response: :class:`~scrapy.http.Response` object
@@ -104,8 +185,34 @@ item_dropped
         to be dropped
     :type exception: :exc:`~scrapy.exceptions.DropItem` exception
 
+item_error
+~~~~~~~~~~
+
+.. signal:: item_error
+.. function:: item_error(item, response, spider, failure)
+
+    Sent when a :ref:`topics-item-pipeline` generates an error (i.e. raises
+    an exception), except :exc:`~scrapy.exceptions.DropItem` exception.
+
+    This signal supports returning deferreds from its handlers.
+
+    :param item: the item that caused the error in the :ref:`topics-item-pipeline`
+    :type item: :ref:`item object <item-types>`
+
+    :param response: the response being processed when the exception was raised
+    :type response: :class:`~scrapy.http.Response` object
+
+    :param spider: the spider which raised the exception
+    :type spider: :class:`~scrapy.spiders.Spider` object
+
+    :param failure: the exception raised
+    :type failure: twisted.python.failure.Failure
+
+Spider signals
+--------------
+
 spider_closed
--------------
+~~~~~~~~~~~~~
 
 .. signal:: spider_closed
 .. function:: spider_closed(spider, reason)
@@ -113,10 +220,10 @@ spider_closed
     Sent after a spider has been closed. This can be used to release per-spider
     resources reserved on :signal:`spider_opened`.
 
-    This signal supports returning deferreds from their handlers.
+    This signal supports returning deferreds from its handlers.
 
     :param spider: the spider which has been closed
-    :type spider: :class:`~scrapy.spider.Spider` object
+    :type spider: :class:`~scrapy.spiders.Spider` object
 
     :param reason: a string which describes the reason why the spider was closed. If
         it was closed because the spider has completed scraping, the reason
@@ -128,7 +235,7 @@ spider_closed
     :type reason: str
 
 spider_opened
--------------
+~~~~~~~~~~~~~
 
 .. signal:: spider_opened
 .. function:: spider_opened(spider)
@@ -137,13 +244,13 @@ spider_opened
     reserve per-spider resources, but can be used for any task that needs to be
     performed when a spider is opened.
 
-    This signal supports returning deferreds from their handlers.
+    This signal supports returning deferreds from its handlers.
 
     :param spider: the spider which has been opened
-    :type spider: :class:`~scrapy.spider.Spider` object
+    :type spider: :class:`~scrapy.spiders.Spider` object
 
 spider_idle
------------
+~~~~~~~~~~~
 
 .. signal:: spider_idle
 .. function:: spider_idle(spider)
@@ -158,33 +265,44 @@ spider_idle
     the engine starts closing the spider. After the spider has finished
     closing, the :signal:`spider_closed` signal is sent.
 
-    You can, for example, schedule some requests in your :signal:`spider_idle`
-    handler to prevent the spider from being closed.
+    You may raise a :exc:`~scrapy.exceptions.DontCloseSpider` exception to
+    prevent the spider from being closed.
 
-    This signal does not support returning deferreds from their handlers.
+    This signal does not support returning deferreds from its handlers.
 
     :param spider: the spider which has gone idle
-    :type spider: :class:`~scrapy.spider.Spider` object
+    :type spider: :class:`~scrapy.spiders.Spider` object
+
+.. note:: Scheduling some requests in your :signal:`spider_idle` handler does
+    **not** guarantee that it can prevent the spider from being closed,
+    although it sometimes can. That's because the spider may still remain idle
+    if all the scheduled requests are rejected by the scheduler (e.g. filtered
+    due to duplication).
 
 spider_error
-------------
+~~~~~~~~~~~~
 
 .. signal:: spider_error
 .. function:: spider_error(failure, response, spider)
 
-    Sent when a spider callback generates an error (ie. raises an exception).
+    Sent when a spider callback generates an error (i.e. raises an exception).
 
-    :param failure: the exception raised as a Twisted `Failure`_ object
-    :type failure: `Failure`_ object
+    This signal does not support returning deferreds from its handlers.
+
+    :param failure: the exception raised
+    :type failure: twisted.python.failure.Failure
 
     :param response: the response being processed when the exception was raised
     :type response: :class:`~scrapy.http.Response` object
 
     :param spider: the spider which raised the exception
-    :type spider: :class:`~scrapy.spider.Spider` object
+    :type spider: :class:`~scrapy.spiders.Spider` object
+
+Request signals
+---------------
 
 request_scheduled
------------------
+~~~~~~~~~~~~~~~~~
 
 .. signal:: request_scheduled
 .. function:: request_scheduled(request, spider)
@@ -192,16 +310,101 @@ request_scheduled
     Sent when the engine schedules a :class:`~scrapy.http.Request`, to be
     downloaded later.
 
-    The signal does not support returning deferreds from their handlers.
+    This signal does not support returning deferreds from its handlers.
 
     :param request: the request that reached the scheduler
     :type request: :class:`~scrapy.http.Request` object
 
     :param spider: the spider that yielded the request
-    :type spider: :class:`~scrapy.spider.Spider` object
+    :type spider: :class:`~scrapy.spiders.Spider` object
+
+request_dropped
+~~~~~~~~~~~~~~~
+
+.. signal:: request_dropped
+.. function:: request_dropped(request, spider)
+
+    Sent when a :class:`~scrapy.http.Request`, scheduled by the engine to be
+    downloaded later, is rejected by the scheduler.
+
+    This signal does not support returning deferreds from its handlers.
+
+    :param request: the request that reached the scheduler
+    :type request: :class:`~scrapy.http.Request` object
+
+    :param spider: the spider that yielded the request
+    :type spider: :class:`~scrapy.spiders.Spider` object
+
+request_reached_downloader
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. signal:: request_reached_downloader
+.. function:: request_reached_downloader(request, spider)
+
+    Sent when a :class:`~scrapy.http.Request` reached downloader.
+
+    This signal does not support returning deferreds from its handlers.
+
+    :param request: the request that reached downloader
+    :type request: :class:`~scrapy.http.Request` object
+
+    :param spider: the spider that yielded the request
+    :type spider: :class:`~scrapy.spiders.Spider` object
+
+request_left_downloader
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. signal:: request_left_downloader
+.. function:: request_left_downloader(request, spider)
+
+    .. versionadded:: 2.0
+
+    Sent when a :class:`~scrapy.http.Request` leaves the downloader, even in case of
+    failure.
+
+    This signal does not support returning deferreds from its handlers.
+
+    :param request: the request that reached the downloader
+    :type request: :class:`~scrapy.http.Request` object
+
+    :param spider: the spider that yielded the request
+    :type spider: :class:`~scrapy.spiders.Spider` object
+
+bytes_received
+~~~~~~~~~~~~~~
+
+.. versionadded:: 2.2
+
+.. signal:: bytes_received
+.. function:: bytes_received(data, request, spider)
+
+    Sent by the HTTP 1.1 and S3 download handlers when a group of bytes is
+    received for a specific request. This signal might be fired multiple
+    times for the same request, with partial data each time. For instance,
+    a possible scenario for a 25 kb response would be two signals fired
+    with 10 kb of data, and a final one with 5 kb of data.
+
+    This signal does not support returning deferreds from its handlers.
+
+    :param data: the data received by the download handler
+    :type data: :class:`bytes` object
+
+    :param request: the request that generated the download
+    :type request: :class:`~scrapy.http.Request` object
+
+    :param spider: the spider associated with the response
+    :type spider: :class:`~scrapy.spiders.Spider` object
+
+.. note:: Handlers of this signal can stop the download of a response while it
+    is in progress by raising the :exc:`~scrapy.exceptions.StopDownload`
+    exception. Please refer to the :ref:`topics-stop-response-download` topic
+    for additional information and examples.
+
+Response signals
+----------------
 
 response_received
------------------
+~~~~~~~~~~~~~~~~~
 
 .. signal:: response_received
 .. function:: response_received(response, request, spider)
@@ -209,7 +412,7 @@ response_received
     Sent when the engine receives a new :class:`~scrapy.http.Response` from the
     downloader.
 
-    This signal does not support returning deferreds from their handlers.
+    This signal does not support returning deferreds from its handlers.
 
     :param response: the response received
     :type response: :class:`~scrapy.http.Response` object
@@ -218,17 +421,22 @@ response_received
     :type request: :class:`~scrapy.http.Request` object
 
     :param spider: the spider for which the response is intended
-    :type spider: :class:`~scrapy.spider.Spider` object
+    :type spider: :class:`~scrapy.spiders.Spider` object
+
+.. note:: The ``request`` argument might not contain the original request that
+    reached the downloader, if a :ref:`topics-downloader-middleware` modifies
+    the :class:`~scrapy.http.Response` object and sets a specific ``request``
+    attribute.
 
 response_downloaded
--------------------
+~~~~~~~~~~~~~~~~~~~
 
 .. signal:: response_downloaded
 .. function:: response_downloaded(response, request, spider)
 
     Sent by the downloader right after a ``HTTPResponse`` is downloaded.
 
-    This signal does not support returning deferreds from their handlers.
+    This signal does not support returning deferreds from its handlers.
 
     :param response: the response downloaded
     :type response: :class:`~scrapy.http.Response` object
@@ -237,6 +445,4 @@ response_downloaded
     :type request: :class:`~scrapy.http.Request` object
 
     :param spider: the spider for which the response is intended
-    :type spider: :class:`~scrapy.spider.Spider` object
-
-.. _Failure: http://twistedmatrix.com/documents/current/api/twisted.python.failure.Failure.html
+    :type spider: :class:`~scrapy.spiders.Spider` object
diff --git a/docs/topics/spider-middleware.rst b/docs/topics/spider-middleware.rst
index 3df59998b..c6cbdba76 100644
--- a/docs/topics/spider-middleware.rst
+++ b/docs/topics/spider-middleware.rst
@@ -28,7 +28,12 @@ The :setting:`SPIDER_MIDDLEWARES` setting is merged with the
 :setting:`SPIDER_MIDDLEWARES_BASE` setting defined in Scrapy (and not meant to
 be overridden) and then sorted by order to get the final sorted list of enabled
 middlewares: the first middleware is the one closer to the engine and the last
-is the one closer to the spider.
+is the one closer to the spider. In other words,
+the :meth:`~scrapy.spidermiddlewares.SpiderMiddleware.process_spider_input`
+method of each middleware will be invoked in increasing
+middleware order (100, 200, 300, ...), and the
+:meth:`~scrapy.spidermiddlewares.SpiderMiddleware.process_spider_output` method
+of each middleware will be invoked in decreasing order.
 
 To decide which order to assign to your middleware see the
 :setting:`SPIDER_MIDDLEWARES_BASE` setting and pick a value according to where
@@ -38,24 +43,30 @@ previous (or subsequent) middleware being applied.
 
 If you want to disable a builtin middleware (the ones defined in
 :setting:`SPIDER_MIDDLEWARES_BASE`, and enabled by default) you must define it
-in your project :setting:`SPIDER_MIDDLEWARES` setting and assign `None` as its
+in your project :setting:`SPIDER_MIDDLEWARES` setting and assign ``None`` as its
 value.  For example, if you want to disable the off-site middleware::
 
     SPIDER_MIDDLEWARES = {
         'myproject.middlewares.CustomSpiderMiddleware': 543,
-        'scrapy.contrib.spidermiddleware.offsite.OffsiteMiddleware': None,
+        'scrapy.spidermiddlewares.offsite.OffsiteMiddleware': None,
     }
 
 Finally, keep in mind that some middlewares may need to be enabled through a
 particular setting. See each middleware documentation for more info.
 
+.. _custom-spider-middleware:
+
 Writing your own spider middleware
 ==================================
 
-Writing your own spider middleware is easy. Each middleware component is a
-single Python class that defines one or more of the following methods:
+Each spider middleware is a Python class that defines one or more of the
+methods defined below.
 
-.. module:: scrapy.contrib.spidermiddleware
+The main entry point is the ``from_crawler`` class method, which receives a
+:class:`~scrapy.crawler.Crawler` instance. The :class:`~scrapy.crawler.Crawler`
+object gives you access, for example, to the :ref:`settings <topics-settings>`.
+
+.. module:: scrapy.spidermiddlewares
 
 .. class:: SpiderMiddleware
 
@@ -73,7 +84,8 @@ single Python class that defines one or more of the following methods:
 
         If it raises an exception, Scrapy won't bother calling any other spider
         middleware :meth:`process_spider_input` and will call the request
-        errback.  The output of the errback is chained back in the other
+        errback if there is one, otherwise it will start the :meth:`process_spider_exception`
+        chain. The output of the errback is chained back in the other
         direction for :meth:`process_spider_output` to process it, or
         :meth:`process_spider_exception` if it raised an exception.
 
@@ -81,7 +93,7 @@ single Python class that defines one or more of the following methods:
         :type response: :class:`~scrapy.http.Response` object
 
         :param spider: the spider for which this response is intended
-        :type spider: :class:`~scrapy.spider.Spider` object
+        :type spider: :class:`~scrapy.spiders.Spider` object
 
 
     .. method:: process_spider_output(response, result, spider)
@@ -90,28 +102,28 @@ single Python class that defines one or more of the following methods:
         it has processed the response.
 
         :meth:`process_spider_output` must return an iterable of
-        :class:`~scrapy.http.Request` or :class:`~scrapy.item.Item` objects.
+        :class:`~scrapy.http.Request` objects and :ref:`item object
+        <topics-items>`.
 
         :param response: the response which generated this output from the
           spider
-        :type response: class:`~scrapy.http.Response` object
+        :type response: :class:`~scrapy.http.Response` object
 
         :param result: the result returned by the spider
-        :type result: an iterable of :class:`~scrapy.http.Request` or
-          :class:`~scrapy.item.Item` objects
+        :type result: an iterable of :class:`~scrapy.http.Request` objects and
+          :ref:`item object <topics-items>`
 
         :param spider: the spider whose result is being processed
-        :type spider: :class:`~scrapy.item.Spider` object
-
+        :type spider: :class:`~scrapy.spiders.Spider` object
 
     .. method:: process_spider_exception(response, exception, spider)
 
-        This method is called when when a spider or :meth:`process_spider_input`
-        method (from other spider middleware) raises an exception.
+        This method is called when a spider or :meth:`process_spider_output`
+        method (from a previous spider middleware) raises an exception.
 
         :meth:`process_spider_exception` should return either ``None`` or an
-        iterable of :class:`~scrapy.http.Response` or
-        :class:`~scrapy.item.Item` objects.
+        iterable of :class:`~scrapy.http.Request` objects and :ref:`item object
+        <topics-items>`.
 
         If it returns ``None``, Scrapy will continue processing this exception,
         executing any other :meth:`process_spider_exception` in the following
@@ -119,17 +131,18 @@ single Python class that defines one or more of the following methods:
         exception reaches the engine (where it's logged and discarded).
 
         If it returns an iterable the :meth:`process_spider_output` pipeline
-        kicks in, and no other :meth:`process_spider_exception` will be called.
+        kicks in, starting from the next spider middleware, and no other
+        :meth:`process_spider_exception` will be called.
 
         :param response: the response being processed when the exception was
           raised
         :type response: :class:`~scrapy.http.Response` object
 
         :param exception: the exception raised
-        :type exception: `Exception`_ object
+        :type exception: :exc:`Exception` object
 
         :param spider: the spider which raised the exception
-        :type spider: :class:`~scrapy.spider.Spider` object
+        :type spider: :class:`~scrapy.spiders.Spider` object
 
     .. method:: process_start_requests(start_requests, spider)
 
@@ -156,11 +169,18 @@ single Python class that defines one or more of the following methods:
         :type start_requests: an iterable of :class:`~scrapy.http.Request`
 
         :param spider: the spider to whom the start requests belong
-        :type spider: :class:`~scrapy.item.Spider` object
+        :type spider: :class:`~scrapy.spiders.Spider` object
 
+    .. method:: from_crawler(cls, crawler)
 
-.. _Exception: http://docs.python.org/library/exceptions.html#exceptions.Exception
+       If present, this classmethod is called to create a middleware instance
+       from a :class:`~scrapy.crawler.Crawler`. It must return a new instance
+       of the middleware. Crawler object provides access to all Scrapy core
+       components like settings and signals; it is a way for middleware to
+       access them and hook its functionality into Scrapy.
 
+       :param crawler: crawler that uses this middleware
+       :type crawler: :class:`~scrapy.crawler.Crawler` object
 
 .. _topics-spider-middleware-ref:
 
@@ -177,28 +197,33 @@ For a list of the components enabled by default (and their orders) see the
 DepthMiddleware
 ---------------
 
-.. module:: scrapy.contrib.spidermiddleware.depth
+.. module:: scrapy.spidermiddlewares.depth
    :synopsis: Depth Spider Middleware
 
 .. class:: DepthMiddleware
 
-   DepthMiddleware is a scrape middleware used for tracking the depth of each
-   Request inside the site being scraped. It can be used to limit the maximum
-   depth to scrape or things like that.
+   DepthMiddleware is used for tracking the depth of each Request inside the
+   site being scraped. It works by setting ``request.meta['depth'] = 0`` whenever
+   there is no value previously set (usually just the first Request) and
+   incrementing it by 1 otherwise.
+
+   It can be used to limit the maximum depth to scrape, control Request
+   priority based on their depth, and things like that.
 
    The :class:`DepthMiddleware` can be configured through the following
    settings (see the settings documentation for more info):
 
       * :setting:`DEPTH_LIMIT` - The maximum depth that will be allowed to
         crawl for any site. If zero, no limit will be imposed.
-      * :setting:`DEPTH_STATS` - Whether to collect depth stats.
+      * :setting:`DEPTH_STATS_VERBOSE` - Whether to collect the number of
+        requests for each depth.
       * :setting:`DEPTH_PRIORITY` - Whether to prioritize the requests based on
         their depth.
 
 HttpErrorMiddleware
 -------------------
 
-.. module:: scrapy.contrib.spidermiddleware.httperror
+.. module:: scrapy.spidermiddlewares.httperror
    :synopsis: HTTP Error Spider Middleware
 
 .. class:: HttpErrorMiddleware
@@ -210,7 +235,7 @@ HttpErrorMiddleware
 According to the `HTTP standard`_, successful responses are those whose
 status codes are in the 200-300 range.
 
-.. _HTTP standard: http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
+.. _HTTP standard: https://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
 
 If you still want to process response codes outside that range, you can
 specify which response codes the spider is able to handle using the
@@ -225,16 +250,19 @@ this::
 
 .. reqmeta:: handle_httpstatus_list
 
+.. reqmeta:: handle_httpstatus_all
+
 The ``handle_httpstatus_list`` key of :attr:`Request.meta
 <scrapy.http.Request.meta>` can also be used to specify which response codes to
-allow on a per-request basis.
+allow on a per-request basis. You can also set the meta key ``handle_httpstatus_all``
+to ``True`` if you want to allow any response code for a request.
 
 Keep in mind, however, that it's usually a bad idea to handle non-200
 responses, unless you really know what you're doing.
 
 For more information see: `HTTP Status Code Definitions`_.
 
-.. _HTTP Status Code Definitions: http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
+.. _HTTP Status Code Definitions: https://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
 
 HttpErrorMiddleware settings
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -260,7 +288,7 @@ Pass all responses, regardless of its status code.
 OffsiteMiddleware
 -----------------
 
-.. module:: scrapy.contrib.spidermiddleware.offsite
+.. module:: scrapy.spidermiddlewares.offsite
    :synopsis: Offsite Spider Middleware
 
 .. class:: OffsiteMiddleware
@@ -268,7 +296,10 @@ OffsiteMiddleware
    Filters out Requests for URLs outside the domains covered by the spider.
 
    This middleware filters out every request whose host names aren't in the
-   spider's :attr:`~scrapy.spider.Spider.allowed_domains` attribute.
+   spider's :attr:`~scrapy.spiders.Spider.allowed_domains` attribute.
+   All subdomains of any domain in the list are also allowed.
+   E.g. the rule ``www.example.org`` will also allow ``bob.www.example.org``
+   but not ``www2.example.com`` nor ``example.com``.
 
    When your spider returns a request for a domain not belonging to those
    covered by the spider, this middleware will log a debug message similar to
@@ -283,7 +314,7 @@ OffsiteMiddleware
    will be printed (but only for the first request filtered).
 
    If the spider doesn't define an
-   :attr:`~scrapy.spider.Spider.allowed_domains` attribute, or the
+   :attr:`~scrapy.spiders.Spider.allowed_domains` attribute, or the
    attribute is empty, the offsite middleware will allow all requests.
 
    If the request has the :attr:`~scrapy.http.Request.dont_filter` attribute
@@ -294,7 +325,7 @@ OffsiteMiddleware
 RefererMiddleware
 -----------------
 
-.. module:: scrapy.contrib.spidermiddleware.referer
+.. module:: scrapy.spidermiddlewares.referer
    :synopsis: Referer Spider Middleware
 
 .. class:: RefererMiddleware
@@ -316,10 +347,94 @@ Default: ``True``
 
 Whether to enable referer middleware.
 
+.. setting:: REFERRER_POLICY
+
+REFERRER_POLICY
+^^^^^^^^^^^^^^^
+
+.. versionadded:: 1.4
+
+Default: ``'scrapy.spidermiddlewares.referer.DefaultReferrerPolicy'``
+
+.. reqmeta:: referrer_policy
+
+`Referrer Policy`_ to apply when populating Request "Referer" header.
+
+.. note::
+    You can also set the Referrer Policy per request,
+    using the special ``"referrer_policy"`` :ref:`Request.meta <topics-request-meta>` key,
+    with the same acceptable values as for the ``REFERRER_POLICY`` setting.
+
+Acceptable values for REFERRER_POLICY
+*************************************
+
+- either a path to a ``scrapy.spidermiddlewares.referer.ReferrerPolicy``
+  subclass — a custom policy or one of the built-in ones (see classes below),
+- or one of the standard W3C-defined string values,
+- or the special ``"scrapy-default"``.
+
+=======================================  ========================================================================
+String value                             Class name (as a string)
+=======================================  ========================================================================
+``"scrapy-default"`` (default)           :class:`scrapy.spidermiddlewares.referer.DefaultReferrerPolicy`
+`"no-referrer"`_                         :class:`scrapy.spidermiddlewares.referer.NoReferrerPolicy`
+`"no-referrer-when-downgrade"`_          :class:`scrapy.spidermiddlewares.referer.NoReferrerWhenDowngradePolicy`
+`"same-origin"`_                         :class:`scrapy.spidermiddlewares.referer.SameOriginPolicy`
+`"origin"`_                              :class:`scrapy.spidermiddlewares.referer.OriginPolicy`
+`"strict-origin"`_                       :class:`scrapy.spidermiddlewares.referer.StrictOriginPolicy`
+`"origin-when-cross-origin"`_            :class:`scrapy.spidermiddlewares.referer.OriginWhenCrossOriginPolicy`
+`"strict-origin-when-cross-origin"`_     :class:`scrapy.spidermiddlewares.referer.StrictOriginWhenCrossOriginPolicy`
+`"unsafe-url"`_                          :class:`scrapy.spidermiddlewares.referer.UnsafeUrlPolicy`
+=======================================  ========================================================================
+
+.. autoclass:: DefaultReferrerPolicy
+.. warning::
+    Scrapy's default referrer policy — just like `"no-referrer-when-downgrade"`_,
+    the W3C-recommended value for browsers — will send a non-empty
+    "Referer" header from any ``http(s)://`` to any ``https://`` URL,
+    even if the domain is different.
+
+    `"same-origin"`_ may be a better choice if you want to remove referrer
+    information for cross-domain requests.
+
+.. autoclass:: NoReferrerPolicy
+
+.. autoclass:: NoReferrerWhenDowngradePolicy
+.. note::
+    "no-referrer-when-downgrade" policy is the W3C-recommended default,
+    and is used by major web browsers.
+
+    However, it is NOT Scrapy's default referrer policy (see :class:`DefaultReferrerPolicy`).
+
+.. autoclass:: SameOriginPolicy
+
+.. autoclass:: OriginPolicy
+
+.. autoclass:: StrictOriginPolicy
+
+.. autoclass:: OriginWhenCrossOriginPolicy
+
+.. autoclass:: StrictOriginWhenCrossOriginPolicy
+
+.. autoclass:: UnsafeUrlPolicy
+.. warning::
+    "unsafe-url" policy is NOT recommended.
+
+.. _Referrer Policy: https://www.w3.org/TR/referrer-policy
+.. _"no-referrer": https://www.w3.org/TR/referrer-policy/#referrer-policy-no-referrer
+.. _"no-referrer-when-downgrade": https://www.w3.org/TR/referrer-policy/#referrer-policy-no-referrer-when-downgrade
+.. _"same-origin": https://www.w3.org/TR/referrer-policy/#referrer-policy-same-origin
+.. _"origin": https://www.w3.org/TR/referrer-policy/#referrer-policy-origin
+.. _"strict-origin": https://www.w3.org/TR/referrer-policy/#referrer-policy-strict-origin
+.. _"origin-when-cross-origin": https://www.w3.org/TR/referrer-policy/#referrer-policy-origin-when-cross-origin
+.. _"strict-origin-when-cross-origin": https://www.w3.org/TR/referrer-policy/#referrer-policy-strict-origin-when-cross-origin
+.. _"unsafe-url": https://www.w3.org/TR/referrer-policy/#referrer-policy-unsafe-url
+
+
 UrlLengthMiddleware
 -------------------
 
-.. module:: scrapy.contrib.spidermiddleware.urllength
+.. module:: scrapy.spidermiddlewares.urllength
    :synopsis: URL Length Spider Middleware
 
 .. class:: UrlLengthMiddleware
diff --git a/docs/topics/spiders.rst b/docs/topics/spiders.rst
index 73c34e75f..e50e4aa0a 100644
--- a/docs/topics/spiders.rst
+++ b/docs/topics/spiders.rst
@@ -17,15 +17,16 @@ For spiders, the scraping cycle goes through something like this:
    those requests.
 
    The first requests to perform are obtained by calling the
-   :meth:`~scrapy.spider.Spider.start_requests` method which (by default)
+   :meth:`~scrapy.spiders.Spider.start_requests` method which (by default)
    generates :class:`~scrapy.http.Request` for the URLs specified in the
-   :attr:`~scrapy.spider.Spider.start_urls` and the
-   :attr:`~scrapy.spider.Spider.parse` method as callback function for the
+   :attr:`~scrapy.spiders.Spider.start_urls` and the
+   :attr:`~scrapy.spiders.Spider.parse` method as callback function for the
    Requests.
 
-2. In the callback function, you parse the response (web page) and return either
-   :class:`~scrapy.item.Item` objects, :class:`~scrapy.http.Request` objects,
-   or an iterable of both. Those Requests will also contain a callback (maybe
+2. In the callback function, you parse the response (web page) and return
+   :ref:`item objects <topics-items>`,
+   :class:`~scrapy.http.Request` objects, or an iterable of these objects.
+   Those Requests will also contain a callback (maybe
    the same) and will then be downloaded by Scrapy and then their
    response handled by the specified callback.
 
@@ -41,70 +42,22 @@ Even though this cycle applies (more or less) to any kind of spider, there are
 different kinds of default spiders bundled into Scrapy for different purposes.
 We will talk about those types here.
 
-.. _spiderargs:
-
-Spider arguments
-================
-
-Spiders can receive arguments that modify their behaviour. Some common uses for
-spider arguments are to define the start URLs or to restrict the crawl to
-certain sections of the site, but they can be used to configure any
-functionality of the spider.
-
-Spider arguments are passed through the :command:`crawl` command using the
-``-a`` option. For example::
-
-    scrapy crawl myspider -a category=electronics
-
-Spiders receive arguments in their constructors::
-
-    import scrapy
-
-    class MySpider(scrapy.Spider):
-        name = 'myspider'
-
-        def __init__(self, category=None, *args, **kwargs):
-            super(MySpider, self).__init__(*args, **kwargs)
-            self.start_urls = ['http://www.example.com/categories/%s' % category]
-            # ...
-
-Spider arguments can also be passed through the Scrapyd ``schedule.json`` API.
-See `Scrapyd documentation`_.
+.. module:: scrapy.spiders
+   :synopsis: Spiders base class, spider manager and spider middleware
 
 .. _topics-spiders-ref:
 
-Built-in spiders reference
-==========================
-
-Scrapy comes with some useful generic spiders that you can use, to subclass
-your spiders from. Their aim is to provide convenient functionality for a few
-common scraping cases, like following all links on a site based on certain
-rules, crawling from `Sitemaps`_, or parsing a XML/CSV feed.
-
-For the examples used in the following spiders, we'll assume you have a project
-with a ``TestItem`` declared in a ``myproject.items`` module::
-
-    import scrapy
-
-    class TestItem(scrapy.Item):
-        id = scrapy.Field()
-        name = scrapy.Field()
-        description = scrapy.Field()
-
-
-.. module:: scrapy.spider
-   :synopsis: Spiders base class, spider manager and spider middleware
-
-Spider
-------
+scrapy.Spider
+=============
 
 .. class:: Spider()
 
    This is the simplest spider, and the one from which every other spider
-   must inherit from (either the ones that come bundled with Scrapy, or the ones
+   must inherit (including spiders that come bundled with Scrapy, as well as spiders
    that you write yourself). It doesn't provide any special functionality. It just
-   requests the given ``start_urls``/``start_requests``, and calls the spider's
-   method ``parse`` for each of the resulting responses.
+   provides a default :meth:`start_requests` implementation which sends requests from
+   the :attr:`start_urls` spider attribute and calls the spider's method ``parse``
+   for each of the resulting responses.
 
    .. attribute:: name
 
@@ -123,55 +76,98 @@ Spider
 
        An optional list of strings containing domains that this spider is
        allowed to crawl. Requests for URLs not belonging to the domain names
-       specified in this list won't be followed if
-       :class:`~scrapy.contrib.spidermiddleware.offsite.OffsiteMiddleware` is enabled.
+       specified in this list (or their subdomains) won't be followed if
+       :class:`~scrapy.spidermiddlewares.offsite.OffsiteMiddleware` is enabled.
+
+       Let's say your target url is ``https://www.example.com/1.html``,
+       then add ``'example.com'`` to the list.
 
    .. attribute:: start_urls
 
        A list of URLs where the spider will begin to crawl from, when no
        particular URLs are specified. So, the first pages downloaded will be those
-       listed here. The subsequent URLs will be generated successively from data
+       listed here. The subsequent :class:`~scrapy.http.Request` will be generated successively from data
        contained in the start URLs.
 
+   .. attribute:: custom_settings
+
+      A dictionary of settings that will be overridden from the project wide
+      configuration when running this spider. It must be defined as a class
+      attribute since the settings are updated before instantiation.
+
+      For a list of available built-in settings see:
+      :ref:`topics-settings-ref`.
+
+   .. attribute:: crawler
+
+      This attribute is set by the :meth:`from_crawler` class method after
+      initializating the class, and links to the
+      :class:`~scrapy.crawler.Crawler` object to which this spider instance is
+      bound.
+
+      Crawlers encapsulate a lot of components in the project for their single
+      entry access (such as extensions, middlewares, signals managers, etc).
+      See :ref:`topics-api-crawler` to know more about them.
+
+   .. attribute:: settings
+
+      Configuration for running this spider. This is a
+      :class:`~scrapy.settings.Settings` instance, see the
+      :ref:`topics-settings` topic for a detailed introduction on this subject.
+
+   .. attribute:: logger
+
+      Python logger created with the Spider's :attr:`name`. You can use it to
+      send log messages through it as described on
+      :ref:`topics-logging-from-spiders`.
+
+   .. method:: from_crawler(crawler, *args, **kwargs)
+
+       This is the class method used by Scrapy to create your spiders.
+
+       You probably won't need to override this directly because the default
+       implementation acts as a proxy to the :meth:`__init__` method, calling
+       it with the given arguments ``args`` and named arguments ``kwargs``.
+
+       Nonetheless, this method sets the :attr:`crawler` and :attr:`settings`
+       attributes in the new instance so they can be accessed later inside the
+       spider's code.
+
+       :param crawler: crawler to which the spider will be bound
+       :type crawler: :class:`~scrapy.crawler.Crawler` instance
+
+       :param args: arguments passed to the :meth:`__init__` method
+       :type args: list
+
+       :param kwargs: keyword arguments passed to the :meth:`__init__` method
+       :type kwargs: dict
+
    .. method:: start_requests()
 
        This method must return an iterable with the first Requests to crawl for
-       this spider.
+       this spider. It is called by Scrapy when the spider is opened for
+       scraping. Scrapy calls it only once, so it is safe to implement
+       :meth:`start_requests` as a generator.
 
-       This is the method called by Scrapy when the spider is opened for
-       scraping when no particular URLs are specified. If particular URLs are
-       specified, the :meth:`make_requests_from_url` is used instead to create
-       the Requests. This method is also called only once from Scrapy, so it's
-       safe to implement it as a generator.
-
-       The default implementation uses :meth:`make_requests_from_url` to
-       generate Requests for each url in :attr:`start_urls`.
+       The default implementation generates ``Request(url, dont_filter=True)``
+       for each url in :attr:`start_urls`.
 
        If you want to change the Requests used to start scraping a domain, this is
        the method to override. For example, if you need to start by logging in using
        a POST request, you could do::
 
-           def start_requests(self):
-               return [scrapy.FormRequest("http://www.example.com/login",
-                                          formdata={'user': 'john', 'pass': 'secret'},
-                                          callback=self.logged_in)]
+           class MySpider(scrapy.Spider):
+               name = 'myspider'
 
-           def logged_in(self, response):
-               # here you would extract links to follow and return Requests for
-               # each of them, with another callback
-               pass
+               def start_requests(self):
+                   return [scrapy.FormRequest("http://www.example.com/login",
+                                              formdata={'user': 'john', 'pass': 'secret'},
+                                              callback=self.logged_in)]
 
-   .. method:: make_requests_from_url(url)
-
-       A method that receives a URL and returns a :class:`~scrapy.http.Request`
-       object (or a list of :class:`~scrapy.http.Request` objects) to scrape. This
-       method is used to construct the initial requests in the
-       :meth:`start_requests` method, and is typically used to convert urls to
-       requests.
-
-       Unless overridden, this method returns Requests with the :meth:`parse`
-       method as their callback function, and with dont_filter parameter enabled
-       (see :class:`~scrapy.http.Request` class for more info).
+               def logged_in(self, response):
+                   # here you would extract links to follow and return Requests for
+                   # each of them, with another callback
+                   pass
 
    .. method:: parse(response)
 
@@ -183,27 +179,23 @@ Spider
        the same requirements as the :class:`Spider` class.
 
        This method, as well as any other Request callback, must return an
-       iterable of :class:`~scrapy.http.Request` and/or
-       :class:`~scrapy.item.Item` objects.
+       iterable of :class:`~scrapy.http.Request` and/or :ref:`item objects
+       <topics-items>`.
 
        :param response: the response to parse
-       :type response: :class:~scrapy.http.Response`
+       :type response: :class:`~scrapy.http.Response`
 
    .. method:: log(message, [level, component])
 
-       Log a message using the :func:`scrapy.log.msg` function, automatically
-       populating the spider argument with the :attr:`name` of this
-       spider. For more information see :ref:`topics-logging`.
+       Wrapper that sends a log message through the Spider's :attr:`logger`,
+       kept for backward compatibility. For more information see
+       :ref:`topics-logging-from-spiders`.
 
    .. method:: closed(reason)
 
        Called when the spider closes. This method provides a shortcut to
        signals.connect() for the :signal:`spider_closed` signal.
 
-
-Spider example
-~~~~~~~~~~~~~~
-
 Let's see an example::
 
     import scrapy
@@ -219,12 +211,11 @@ Let's see an example::
         ]
 
         def parse(self, response):
-            self.log('A response from %s just arrived!' % response.url)
+            self.logger.info('A response from %s just arrived!', response.url)
 
-Another example returning multiple Requests and Items from a single callback::
+Return multiple Requests and items from a single callback::
 
     import scrapy
-    from myproject.items import MyItem
 
     class MySpider(scrapy.Spider):
         name = 'example.com'
@@ -236,14 +227,115 @@ Another example returning multiple Requests and Items from a single callback::
         ]
 
         def parse(self, response):
-            for h3 in response.xpath('//h3').extract():
+            for h3 in response.xpath('//h3').getall():
+                yield {"title": h3}
+
+            for href in response.xpath('//a/@href').getall():
+                yield scrapy.Request(response.urljoin(href), self.parse)
+
+Instead of :attr:`~.start_urls` you can use :meth:`~.start_requests` directly;
+to give data more structure you can use :class:`~scrapy.item.Item` objects::
+
+    import scrapy
+    from myproject.items import MyItem
+
+    class MySpider(scrapy.Spider):
+        name = 'example.com'
+        allowed_domains = ['example.com']
+
+        def start_requests(self):
+            yield scrapy.Request('http://www.example.com/1.html', self.parse)
+            yield scrapy.Request('http://www.example.com/2.html', self.parse)
+            yield scrapy.Request('http://www.example.com/3.html', self.parse)
+
+        def parse(self, response):
+            for h3 in response.xpath('//h3').getall():
                 yield MyItem(title=h3)
 
-            for url in response.xpath('//a/@href').extract():
-                yield scrapy.Request(url, callback=self.parse)
+            for href in response.xpath('//a/@href').getall():
+                yield scrapy.Request(response.urljoin(href), self.parse)
 
-.. module:: scrapy.contrib.spiders
-   :synopsis: Collection of generic spiders
+.. _spiderargs:
+
+Spider arguments
+================
+
+Spiders can receive arguments that modify their behaviour. Some common uses for
+spider arguments are to define the start URLs or to restrict the crawl to
+certain sections of the site, but they can be used to configure any
+functionality of the spider.
+
+Spider arguments are passed through the :command:`crawl` command using the
+``-a`` option. For example::
+
+    scrapy crawl myspider -a category=electronics
+
+Spiders can access arguments in their `__init__` methods::
+
+    import scrapy
+
+    class MySpider(scrapy.Spider):
+        name = 'myspider'
+
+        def __init__(self, category=None, *args, **kwargs):
+            super(MySpider, self).__init__(*args, **kwargs)
+            self.start_urls = ['http://www.example.com/categories/%s' % category]
+            # ...
+
+The default `__init__` method will take any spider arguments
+and copy them to the spider as attributes.
+The above example can also be written as follows::
+
+    import scrapy
+
+    class MySpider(scrapy.Spider):
+        name = 'myspider'
+
+        def start_requests(self):
+            yield scrapy.Request('http://www.example.com/categories/%s' % self.category)
+
+Keep in mind that spider arguments are only strings.
+The spider will not do any parsing on its own.
+If you were to set the ``start_urls`` attribute from the command line,
+you would have to parse it on your own into a list
+using something like :func:`ast.literal_eval` or :func:`json.loads`
+and then set it as an attribute.
+Otherwise, you would cause iteration over a ``start_urls`` string
+(a very common python pitfall)
+resulting in each character being seen as a separate url.
+
+A valid use case is to set the http auth credentials
+used by :class:`~scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware`
+or the user agent
+used by :class:`~scrapy.downloadermiddlewares.useragent.UserAgentMiddleware`::
+
+    scrapy crawl myspider -a http_user=myuser -a http_pass=mypassword -a user_agent=mybot
+
+Spider arguments can also be passed through the Scrapyd ``schedule.json`` API.
+See `Scrapyd documentation`_.
+
+.. _builtin-spiders:
+
+Generic Spiders
+===============
+
+Scrapy comes with some useful generic spiders that you can use to subclass
+your spiders from. Their aim is to provide convenient functionality for a few
+common scraping cases, like following all links on a site based on certain
+rules, crawling from `Sitemaps`_, or parsing an XML/CSV feed.
+
+For the examples used in the following spiders, we'll assume you have a project
+with a ``TestItem`` declared in a ``myproject.items`` module::
+
+    import scrapy
+
+    class TestItem(scrapy.Item):
+        id = scrapy.Field()
+        name = scrapy.Field()
+        description = scrapy.Field()
+
+
+.. currentmodule:: scrapy.spiders
 
 CrawlSpider
 -----------
@@ -268,48 +360,67 @@ CrawlSpider
 
    This spider also exposes an overrideable method:
 
-   .. method:: parse_start_url(response)
+   .. method:: parse_start_url(response, **kwargs)
 
-      This method is called for the start_urls responses. It allows to parse
-      the initial responses and must return either a
-      :class:`~scrapy.item.Item` object, a :class:`~scrapy.http.Request`
+      This method is called for each response produced for the URLs in
+      the spider's ``start_urls`` attribute. It allows to parse
+      the initial responses and must return either an
+      :ref:`item object <topics-items>`, a :class:`~scrapy.http.Request`
       object, or an iterable containing any of them.
 
 Crawling rules
 ~~~~~~~~~~~~~~
 
-.. class:: Rule(link_extractor, callback=None, cb_kwargs=None, follow=None, process_links=None, process_request=None)
+.. autoclass:: Rule
 
    ``link_extractor`` is a :ref:`Link Extractor <topics-link-extractors>` object which
-   defines how links will be extracted from each crawled page.
+   defines how links will be extracted from each crawled page. Each produced link will
+   be used to generate a :class:`~scrapy.http.Request` object, which will contain the
+   link's text in its ``meta`` dictionary (under the ``link_text`` key).
+   If omitted, a default link extractor created with no arguments will be used,
+   resulting in all links being extracted.
 
    ``callback`` is a callable or a string (in which case a method from the spider
    object with that name will be used) to be called for each link extracted with
-   the specified link_extractor. This callback receives a response as its first
-   argument and must return a list containing :class:`~scrapy.item.Item` and/or
-   :class:`~scrapy.http.Request` objects (or any subclass of them).
-
-   .. warning:: When writing crawl spider rules, avoid using ``parse`` as
-       callback, since the :class:`CrawlSpider` uses the ``parse`` method
-       itself to implement its logic. So if you override the ``parse`` method,
-       the crawl spider will no longer work.
+   the specified link extractor. This callback receives a :class:`~scrapy.http.Response`
+   as its first argument and must return either a single instance or an iterable of
+   :ref:`item objects <topics-items>` and/or :class:`~scrapy.http.Request` objects
+   (or any subclass of them). As mentioned above, the received :class:`~scrapy.http.Response`
+   object will contain the text of the link that produced the :class:`~scrapy.http.Request`
+   in its ``meta`` dictionary (under the ``link_text`` key)
 
    ``cb_kwargs`` is a dict containing the keyword arguments to be passed to the
    callback function.
 
    ``follow`` is a boolean which specifies if links should be followed from each
    response extracted with this rule. If ``callback`` is None ``follow`` defaults
-   to ``True``, otherwise it default to ``False``.
+   to ``True``, otherwise it defaults to ``False``.
 
    ``process_links`` is a callable, or a string (in which case a method from the
    spider object with that name will be used) which will be called for each list
    of links extracted from each response using the specified ``link_extractor``.
    This is mainly used for filtering purposes.
 
-   ``process_request`` is a callable, or a string (in which case a method from
-   the spider object with that name will be used) which will be called with
-   every request extracted by this rule, and must return a request or None (to
-   filter out the request).
+   ``process_request`` is a callable (or a string, in which case a method from
+   the spider object with that name will be used) which will be called for every
+   :class:`~scrapy.http.Request` extracted by this rule. This callable should
+   take said request as first argument and the :class:`~scrapy.http.Response`
+   from which the request originated as second argument. It must return a
+   ``Request`` object or ``None`` (to filter out the request).
+
+   ``errback`` is a callable or a string (in which case a method from the spider
+   object with that name will be used) to be called if any exception is
+   raised while processing a request generated by the rule.
+   It receives a :class:`Twisted Failure <twisted.python.failure.Failure>`
+   instance as first parameter.
+
+
+.. warning:: Because of its internal implementation, you must explicitly set
+   callbacks for new requests when writing :class:`CrawlSpider`-based spiders;
+   unexpected behaviour can occur otherwise.
+
+   .. versionadded:: 2.0
+      The *errback* parameter.
 
 CrawlSpider example
 ~~~~~~~~~~~~~~~~~~~
@@ -317,8 +428,8 @@ CrawlSpider example
 Let's now take a look at an example CrawlSpider with rules::
 
     import scrapy
-    from scrapy.contrib.spiders import CrawlSpider, Rule
-    from scrapy.contrib.linkextractors import LinkExtractor
+    from scrapy.spiders import CrawlSpider, Rule
+    from scrapy.linkextractors import LinkExtractor
 
     class MySpider(CrawlSpider):
         name = 'example.com'
@@ -335,18 +446,24 @@ Let's now take a look at an example CrawlSpider with rules::
         )
 
         def parse_item(self, response):
-            self.log('Hi, this is an item page! %s' % response.url)
+            self.logger.info('Hi, this is an item page! %s', response.url)
             item = scrapy.Item()
             item['id'] = response.xpath('//td[@id="item_id"]/text()').re(r'ID: (\d+)')
-            item['name'] = response.xpath('//td[@id="item_name"]/text()').extract()
-            item['description'] = response.xpath('//td[@id="item_description"]/text()').extract()
+            item['name'] = response.xpath('//td[@id="item_name"]/text()').get()
+            item['description'] = response.xpath('//td[@id="item_description"]/text()').get()
+            item['link_text'] = response.meta['link_text']
+            url = response.xpath('//td[@id="additional_data"]/@href').get()
+            return response.follow(url, self.parse_additional_page, cb_kwargs=dict(item=item))
+
+        def parse_additional_page(self, response, item):
+            item['additional_data'] = response.xpath('//p[@id="additional_data"]/text()').get()
             return item
 
 
 This spider would start crawling example.com's home page, collecting category
 links, and item links, parsing the latter with the ``parse_item`` method. For
 each item response, some data will be extracted from the HTML using XPath, and
-a :class:`~scrapy.item.Item` will be filled with it.
+an :class:`~scrapy.item.Item` will be filled with it.
 
 XMLFeedSpider
 -------------
@@ -420,7 +537,7 @@ XMLFeedSpider
         (``itertag``).  Receives the response and an
         :class:`~scrapy.selector.Selector` for each node.  Overriding this
         method is mandatory. Otherwise, you spider won't work.  This method
-        must return either a :class:`~scrapy.item.Item` object, a
+        must return an :ref:`item object <topics-items>`, a
         :class:`~scrapy.http.Request` object, or an iterable containing any of
         them.
 
@@ -430,7 +547,12 @@ XMLFeedSpider
         spider, and it's intended to perform any last time processing required
         before returning the results to the framework core, for example setting the
         item IDs. It receives a list of results and the response which originated
-        those results. It must return a list of results (Items or Requests).
+        those results. It must return a list of results (items or requests).
+
+
+.. warning:: Because of its internal implementation, you must explicitly set
+   callbacks for new requests when writing :class:`XMLFeedSpider`-based spiders;
+   unexpected behaviour can occur otherwise.
 
 
 XMLFeedSpider example
@@ -438,8 +560,7 @@ XMLFeedSpider example
 
 These spiders are pretty easy to use, let's have a look at one example::
 
-    from scrapy import log
-    from scrapy.contrib.spiders import XMLFeedSpider
+    from scrapy.spiders import XMLFeedSpider
     from myproject.items import TestItem
 
     class MySpider(XMLFeedSpider):
@@ -450,12 +571,12 @@ These spiders are pretty easy to use, let's have a look at one example::
         itertag = 'item'
 
         def parse_node(self, response, node):
-            log.msg('Hi, this is a <%s> node!: %s' % (self.itertag, ''.join(node.extract())))
+            self.logger.info('Hi, this is a <%s> node!: %s', self.itertag, ''.join(node.getall()))
 
             item = TestItem()
-            item['id'] = node.xpath('@id').extract()
-            item['name'] = node.xpath('name').extract()
-            item['description'] = node.xpath('description').extract()
+            item['id'] = node.xpath('@id').get()
+            item['name'] = node.xpath('name').get()
+            item['description'] = node.xpath('description').get()
             return item
 
 Basically what we did up there was to create a spider that downloads a feed from
@@ -476,10 +597,14 @@ CSVFeedSpider
        A string with the separator character for each field in the CSV file
        Defaults to ``','`` (comma).
 
+   .. attribute:: quotechar
+
+       A string with the enclosure character for each field in the CSV file
+       Defaults to ``'"'`` (quotation mark).
+
    .. attribute:: headers
 
-       A list of the rows contained in the file CSV feed which will be used to
-       extract fields from it.
+       A list of the column names in the CSV file.
 
    .. method:: parse_row(response, row)
 
@@ -494,8 +619,7 @@ CSVFeedSpider example
 Let's see an example similar to the previous one, but using a
 :class:`CSVFeedSpider`::
 
-    from scrapy import log
-    from scrapy.contrib.spiders import CSVFeedSpider
+    from scrapy.spiders import CSVFeedSpider
     from myproject.items import TestItem
 
     class MySpider(CSVFeedSpider):
@@ -503,10 +627,11 @@ Let's see an example similar to the previous one, but using a
         allowed_domains = ['example.com']
         start_urls = ['http://www.example.com/feed.csv']
         delimiter = ';'
+        quotechar = "'"
         headers = ['id', 'name', 'description']
 
         def parse_row(self, response, row):
-            log.msg('Hi, this is a row!: %r' % row)
+            self.logger.info('Hi, this is a row!: %r', row)
 
             item = TestItem()
             item['id'] = row['id']
@@ -556,7 +681,7 @@ SitemapSpider
 
     .. attribute:: sitemap_follow
 
-        A list of regexes of sitemap that should be followed. This is is only
+        A list of regexes of sitemap that should be followed. This is only
         for sites that use `Sitemap index files`_ that point to other sitemap
         files.
 
@@ -581,6 +706,50 @@ SitemapSpider
 
         Default is ``sitemap_alternate_links`` disabled.
 
+    .. method:: sitemap_filter(entries)
+
+        This is a filter function that could be overridden to select sitemap entries
+        based on their attributes.
+
+        For example::
+
+            <url>
+                <loc>http://example.com/</loc>
+                <lastmod>2005-01-01</lastmod>
+            </url>
+
+        We can define a ``sitemap_filter`` function to filter ``entries`` by date::
+
+            from datetime import datetime
+            from scrapy.spiders import SitemapSpider
+
+            class FilteredSitemapSpider(SitemapSpider):
+                name = 'filtered_sitemap_spider'
+                allowed_domains = ['example.com']
+                sitemap_urls = ['http://example.com/sitemap.xml']
+
+                def sitemap_filter(self, entries):
+                    for entry in entries:
+                        date_time = datetime.strptime(entry['lastmod'], '%Y-%m-%d')
+                        if date_time.year >= 2005:
+                            yield entry
+
+        This would retrieve only ``entries`` modified on 2005 and the following
+        years.
+
+        Entries are dict objects extracted from the sitemap document.
+        Usually, the key is the tag name and the value is the text inside it.
+
+        It's important to notice that:
+
+        - as the loc attribute is required, entries without this tag are discarded
+        - alternate links are stored in a list with the key ``alternate``
+          (see ``sitemap_alternate_links``)
+        - namespaces are removed, so lxml tags named as ``{namespace}tagname`` become only ``tagname``
+
+        If you omit this method, all entries found in sitemaps will be
+        processed, observing other attributes and their settings.
+
 
 SitemapSpider examples
 ~~~~~~~~~~~~~~~~~~~~~~
@@ -588,7 +757,7 @@ SitemapSpider examples
 Simplest example: process all urls discovered through sitemaps using the
 ``parse`` callback::
 
-    from scrapy.contrib.spiders import SitemapSpider
+    from scrapy.spiders import SitemapSpider
 
     class MySpider(SitemapSpider):
         sitemap_urls = ['http://www.example.com/sitemap.xml']
@@ -599,7 +768,7 @@ Simplest example: process all urls discovered through sitemaps using the
 Process some urls with certain callback and other urls with a different
 callback::
 
-    from scrapy.contrib.spiders import SitemapSpider
+    from scrapy.spiders import SitemapSpider
 
     class MySpider(SitemapSpider):
         sitemap_urls = ['http://www.example.com/sitemap.xml']
@@ -617,7 +786,7 @@ callback::
 Follow sitemaps defined in the `robots.txt`_ file and only follow sitemaps
 whose url contains ``/sitemap_shop``::
 
-    from scrapy.contrib.spiders import SitemapSpider
+    from scrapy.spiders import SitemapSpider
 
     class MySpider(SitemapSpider):
         sitemap_urls = ['http://www.example.com/robots.txt']
@@ -631,7 +800,7 @@ whose url contains ``/sitemap_shop``::
 
 Combine SitemapSpider with other sources of urls::
 
-    from scrapy.contrib.spiders import SitemapSpider
+    from scrapy.spiders import SitemapSpider
 
     class MySpider(SitemapSpider):
         sitemap_urls = ['http://www.example.com/robots.txt']
@@ -652,8 +821,8 @@ Combine SitemapSpider with other sources of urls::
         def parse_other(self, response):
             pass # ... scrape other here ...
 
-.. _Sitemaps: http://www.sitemaps.org
-.. _Sitemap index files: http://www.sitemaps.org/protocol.php#index
-.. _robots.txt: http://www.robotstxt.org/
-.. _TLD: http://en.wikipedia.org/wiki/Top-level_domain
-.. _Scrapyd documentation: http://scrapyd.readthedocs.org/
+.. _Sitemaps: https://www.sitemaps.org/index.html
+.. _Sitemap index files: https://www.sitemaps.org/protocol.html#index
+.. _robots.txt: https://www.robotstxt.org/
+.. _TLD: https://en.wikipedia.org/wiki/Top-level_domain
+.. _Scrapyd documentation: https://scrapyd.readthedocs.io/en/latest/
diff --git a/docs/topics/stats.rst b/docs/topics/stats.rst
index 5560a9446..af848b402 100644
--- a/docs/topics/stats.rst
+++ b/docs/topics/stats.rst
@@ -32,7 +32,7 @@ Common Stats Collector uses
 Access the stats collector through the :attr:`~scrapy.crawler.Crawler.stats`
 attribute. Here is an example of an extension that access stats::
 
-    class ExtensionThatAccessStats(object):
+    class ExtensionThatAccessStats:
 
         def __init__(self, stats):
             self.stats = stats
@@ -47,7 +47,7 @@ Set stat value::
 
 Increment stat value::
 
-    stats.inc_value('pages_crawled')
+    stats.inc_value('custom_count')
 
 Set stat value only if greater than previous::
 
@@ -57,15 +57,15 @@ Set stat value only if lower than previous::
 
     stats.min_value('min_free_memory_percent', value)
 
-Get stat value::
+Get stat value:
 
-    >>> stats.get_value('pages_crawled')
-    8
+>>> stats.get_value('custom_count')
+1
 
-Get all stats::
+Get all stats:
 
-    >>> stats.get_stats()
-    {'pages_crawled': 1238, 'start_time': datetime.datetime(2009, 7, 14, 21, 47, 28, 977139)}
+>>> stats.get_stats()
+{'custom_count': 1, 'start_time': datetime.datetime(2009, 7, 14, 21, 47, 28, 977139)}
 
 Available Stats Collectors
 ==========================
@@ -75,8 +75,7 @@ available in Scrapy which extend the basic Stats Collector. You can select
 which Stats Collector to use through the :setting:`STATS_CLASS` setting. The
 default Stats Collector used is the :class:`MemoryStatsCollector`. 
 
-.. module:: scrapy.statscol
-   :synopsis: Stats Collectors
+.. currentmodule:: scrapy.statscollectors
 
 MemoryStatsCollector
 --------------------
diff --git a/docs/topics/telnetconsole.rst b/docs/topics/telnetconsole.rst
index 267e6e1d8..9802a34a2 100644
--- a/docs/topics/telnetconsole.rst
+++ b/docs/topics/telnetconsole.rst
@@ -1,12 +1,11 @@
+.. currentmodule:: scrapy.extensions.telnet
+
 .. _topics-telnetconsole:
 
 ==============
 Telnet Console
 ==============
 
-.. module:: scrapy.telnet
-   :synopsis: The Telnet Console
-
 Scrapy comes with a built-in telnet console for inspecting and controlling a
 Scrapy running process. The telnet console is just a regular python shell
 running inside the Scrapy process, so you can do literally anything from it.
@@ -16,6 +15,17 @@ The telnet console is a :ref:`built-in Scrapy extension
 disable it if you want. For more information about the extension itself see
 :ref:`topics-extensions-ref-telnetconsole`.
 
+.. warning::
+    It is not secure to use telnet console via public networks, as telnet
+    doesn't provide any transport-layer security. Having username/password
+    authentication doesn't change that.
+
+    Intended usage is connecting to a running Scrapy spider locally
+    (spider process and telnet client are on the same machine)
+    or over a secure connection (VPN, SSH tunnel).
+    Please avoid using telnet console over insecure connections,
+    or disable it completely using :setting:`TELNETCONSOLE_ENABLED` option.
+
 .. highlight:: none
 
 How to access the telnet console
@@ -26,8 +36,26 @@ The telnet console listens in the TCP port defined in the
 the console you need to type::
 
     telnet localhost 6023
+    Trying localhost...
+    Connected to localhost.
+    Escape character is '^]'.
+    Username:
+    Password:
     >>>
-    
+
+By default Username is ``scrapy`` and Password is autogenerated. The
+autogenerated Password can be seen on Scrapy logs like the example below::
+
+    2018-10-16 14:35:21 [scrapy.extensions.telnet] INFO: Telnet Password: 16f92501e8a59326
+
+Default Username and Password can be overridden by the settings
+:setting:`TELNETCONSOLE_USERNAME` and :setting:`TELNETCONSOLE_PASSWORD`.
+
+.. warning::
+    Username and password provide only a limited protection, as telnet
+    is not using secure transport - by default traffic is not encrypted
+    even if username and password are set.
+
 You need the telnet program which comes installed by default in Windows, and
 most Linux distros.
 
@@ -35,7 +63,7 @@ Available variables in the telnet console
 =========================================
 
 The telnet console is like a regular Python shell running inside the Scrapy
-process, so you can do anything from it including importing new modules, etc. 
+process, so you can do anything from it including importing new modules, etc.
 
 However, the telnet console comes with some default variables defined for
 convenience:
@@ -61,13 +89,11 @@ convenience:
 +----------------+-------------------------------------------------------------------+
 | ``prefs``      | for memory debugging (see :ref:`topics-leaks`)                    |
 +----------------+-------------------------------------------------------------------+
-| ``p``          | a shortcut to the `pprint.pprint`_ function                       |
+| ``p``          | a shortcut to the :func:`pprint.pprint` function                  |
 +----------------+-------------------------------------------------------------------+
 | ``hpy``        | for memory debugging (see :ref:`topics-leaks`)                    |
 +----------------+-------------------------------------------------------------------+
 
-.. _pprint.pprint: http://docs.python.org/library/pprint.html#pprint.pprint
-
 Telnet console usage examples
 =============================
 
@@ -160,3 +186,23 @@ Default: ``'127.0.0.1'``
 
 The interface the telnet console should listen on
 
+
+.. setting:: TELNETCONSOLE_USERNAME
+
+TELNETCONSOLE_USERNAME
+----------------------
+
+Default: ``'scrapy'``
+
+The username used for the telnet console
+
+
+.. setting:: TELNETCONSOLE_PASSWORD
+
+TELNETCONSOLE_PASSWORD
+----------------------
+
+Default: ``None``
+
+The password used for the telnet console, default behaviour is to have it
+autogenerated
diff --git a/docs/topics/ubuntu.rst b/docs/topics/ubuntu.rst
deleted file mode 100644
index cfe49d722..000000000
--- a/docs/topics/ubuntu.rst
+++ /dev/null
@@ -1,36 +0,0 @@
-.. _topics-ubuntu:
-
-===============
-Ubuntu packages
-===============
-
-.. versionadded:: 0.10
-
-`Scrapinghub`_ publishes apt-gettable packages which are generally fresher than
-those in Ubuntu, and more stable too since they're continuously built from
-`Github repo`_ (master & stable branches) and so they contain the latest bug
-fixes.
-
-To use the packages:
-
-1. Import the GPG key used to sign Scrapy packages into APT keyring::
-
-    sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 627220E7
-
-2. Create `/etc/apt/sources.list.d/scrapy.list` file using the following command::
-
-    echo 'deb http://archive.scrapy.org/ubuntu scrapy main' | sudo tee /etc/apt/sources.list.d/scrapy.list
-
-3. Update package lists and install the scrapy-|version| package:
-
-   .. parsed-literal::
-
-      sudo apt-get update && sudo apt-get install scrapy-|version|
-
-.. note:: Repeat step 3 if you are trying to upgrade Scrapy.
-
-.. warning:: `python-scrapy` is a different package provided by official debian
-   repositories, it's very outdated and it isn't supported by Scrapy team.
-
-.. _Scrapinghub: http://scrapinghub.com/
-.. _Github repo: https://github.com/scrapy/scrapy
diff --git a/docs/topics/webservice.rst b/docs/topics/webservice.rst
index a064acb16..2c4052c04 100644
--- a/docs/topics/webservice.rst
+++ b/docs/topics/webservice.rst
@@ -8,4 +8,4 @@ webservice has been moved into a separate project.
 
 It is hosted at:
 
-    https://github.com/scrapy/scrapy-jsonrpc
+    https://github.com/scrapy-plugins/scrapy-jsonrpc
diff --git a/docs/utils/linkfix.py b/docs/utils/linkfix.py
new file mode 100755
index 000000000..95a3f17d5
--- /dev/null
+++ b/docs/utils/linkfix.py
@@ -0,0 +1,70 @@
+#!/usr/bin/python
+
+"""
+
+Linkfix - a companion to sphinx's linkcheck builder.
+
+Uses the linkcheck's output file to fix links in docs.
+
+Originally created for this issue:
+https://github.com/scrapy/scrapy/issues/606
+
+Author: dufferzafar
+"""
+
+import re
+
+
+def main():
+
+    # Used for remembering the file (and its contents)
+    # so we don't have to open the same file again.
+    _filename = None
+    _contents = None
+
+    # A regex that matches standard linkcheck output lines
+    line_re = re.compile(r'(.*)\:\d+\:\s\[(.*)\]\s(?:(.*)\sto\s(.*)|(.*))')
+
+    # Read lines from the linkcheck output file
+    try:
+        with open("build/linkcheck/output.txt") as out:
+            output_lines = out.readlines()
+    except IOError:
+        print("linkcheck output not found; please run linkcheck first.")
+        exit(1)
+
+    # For every line, fix the respective file
+    for line in output_lines:
+        match = re.match(line_re, line)
+
+        if match:
+            newfilename = match.group(1)
+            errortype = match.group(2)
+
+            # Broken links can't be fixed and
+            # I am not sure what do with the local ones.
+            if errortype.lower() in ["broken", "local"]:
+                print("Not Fixed: " + line)
+            else:
+                # If this is a new file
+                if newfilename != _filename:
+
+                    # Update the previous file
+                    if _filename:
+                        with open(_filename, "w") as _file:
+                            _file.write(_contents)
+
+                    _filename = newfilename
+
+                    # Read the new file to memory
+                    with open(_filename) as _file:
+                        _contents = _file.read()
+
+                _contents = _contents.replace(match.group(3), match.group(4))
+        else:
+            # We don't understand what the current line means!
+            print("Not Understood: " + line)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/docs/versioning.rst b/docs/versioning.rst
index fffa68b33..57643ea9a 100644
--- a/docs/versioning.rst
+++ b/docs/versioning.rst
@@ -1,43 +1,69 @@
 .. _versioning:
 
 ============================
-Versioning and API Stability
+Versioning and API stability
 ============================
 
 Versioning
 ==========
 
-Scrapy uses the `odd-numbered versions for development releases`_.
-
 There are 3 numbers in a Scrapy version: *A.B.C*
 
 * *A* is the major version. This will rarely change and will signify very
-  large changes. So far, only zero is available for *A* as Scrapy hasn't yet
-  reached 1.0.
+  large changes.
 * *B* is the release number. This will include many changes including features
-  and things that possibly break backwards compatibility. Even Bs will be
-  stable branches, and odd Bs will be development.
+  and things that possibly break backward compatibility, although we strive to
+  keep theses cases at a minimum.
 * *C* is the bugfix release number.
 
+Backward-incompatibilities are explicitly mentioned in the :ref:`release notes <news>`,
+and may require special attention before upgrading.
+
+Development releases do not follow 3-numbers version and are generally
+released as ``dev`` suffixed versions, e.g. ``1.3dev``.
+
+.. note::
+    With Scrapy 0.* series, Scrapy used `odd-numbered versions for development releases`_.
+    This is not the case anymore from Scrapy 1.0 onwards.
+
+    Starting with Scrapy 1.0, all releases should be considered production-ready.
+
 For example:
 
-* *0.14.1* is the first bugfix release of the *0.14* series (safe to use in
+* *1.1.1* is the first bugfix release of the *1.1* series (safe to use in
   production)
 
-API Stability
+
+API stability
 =============
 
-API stability is one of Scrapy major goals for the *1.0* release, which doesn't
-have a due date scheduled yet.
+API stability was one of the major goals for the *1.0* release.
 
 Methods or functions that start with a single dash (``_``) are private and
-should never be relied as stable. Besides those, the plan is to stabilize and
-document the entire API, as we approach the 1.0 release. 
+should never be relied as stable.
 
 Also, keep in mind that stable doesn't mean complete: stable APIs could grow
 new methods or functionality but the existing methods should keep working the
 same way.
 
 
-.. _odd-numbered versions for development releases: http://en.wikipedia.org/wiki/Software_versioning#Odd-numbered_versions_for_development_releases
+.. _deprecation-policy:
+
+Deprecation policy
+==================
+
+We aim to maintain support for deprecated Scrapy features for at least 1 year.
+
+For example, if a feature is deprecated in a Scrapy version released on
+June 15th 2020, that feature should continue to work in versions released on
+June 14th 2021 or before that.
+
+Any new Scrapy release after a year *may* remove support for that deprecated
+feature.
+
+All deprecated features removed in a Scrapy release are explicitly mentioned in
+the :ref:`release notes <news>`.
+
+
+.. _odd-numbered versions for development releases: https://en.wikipedia.org/wiki/Software_versioning#Odd-numbered_versions_for_development_releases
 
diff --git a/extras/coverage-report.sh b/extras/coverage-report.sh
index dc20e16e4..842d0e46e 100755
--- a/extras/coverage-report.sh
+++ b/extras/coverage-report.sh
@@ -1,6 +1,6 @@
 # Run tests, generate coverage report and open it on a browser
 #
-# Requires: coverage 3.3 or above from http://pypi.python.org/pypi/coverage
+# Requires: coverage 3.3 or above from https://pypi.python.org/pypi/coverage
 
 coverage run --branch $(which trial) --reporter=text tests
 coverage html -i
diff --git a/extras/makedeb.py b/extras/makedeb.py
deleted file mode 100644
index 04dea2cb0..000000000
--- a/extras/makedeb.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import sys, os, glob, shutil
-from subprocess import check_call
-from scrapy import version_info
-
-def build(suffix):
-    for ifn in glob.glob("debian/scrapy.*"):
-        s = open(ifn).read()
-        s = s.replace('SUFFIX', suffix)
-        pre, suf = ifn.split('.', 1)
-        ofn = "%s-%s.%s" % (pre, suffix, suf)
-        with open(ofn, 'w') as of:
-            of.write(s)
-
-    for ifn in ['debian/control', 'debian/changelog']:
-        s = open(ifn).read()
-        s = s.replace('SUFFIX', suffix)
-        with open(ifn, 'w') as of:
-            of.write(s)
-
-    check_call('debchange -m -D unstable --force-distribution -v $(python setup.py --version)+$(date +%s) "Automatic build"', \
-        shell=True)
-    check_call('debuild -us -uc -b', shell=True)
-
-def clean(suffix):
-    for f in glob.glob("debian/python-scrapy%s*" % suffix):
-        if os.path.isdir(f):
-            shutil.rmtree(f)
-        else:
-            os.remove(f)
-
-def main():
-    cmd = sys.argv[1]
-    suffix = '%s.%s' % version_info[:2]
-    if cmd == 'build':
-        build(suffix)
-    elif cmd == 'clean':
-        clean(suffix)
-
-if __name__ == '__main__':
-    main()
diff --git a/extras/qps-bench-server.py b/extras/qps-bench-server.py
index 3bef20bf3..da7a0022b 100755
--- a/extras/qps-bench-server.py
+++ b/extras/qps-bench-server.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python
-from __future__ import print_function
 from time import time
 from collections import deque
 from twisted.web.server import Site, NOT_DONE_YET
diff --git a/extras/qpsclient.py b/extras/qpsclient.py
index 7a1baccca..fe1f96cbb 100644
--- a/extras/qpsclient.py
+++ b/extras/qpsclient.py
@@ -7,7 +7,7 @@ usage:
 
 """
 
-from scrapy.spider import Spider
+from scrapy.spiders import Spider
 from scrapy.http import Request
 
 
@@ -27,7 +27,7 @@ class QPSSpider(Spider):
     slots = 1
 
     def __init__(self, *a, **kw):
-        super(QPSSpider, self).__init__(*a, **kw)
+        super().__init__(*a, **kw)
         if self.qps is not None:
             self.qps = float(self.qps)
             self.download_delay = 1 / self.qps
@@ -41,7 +41,7 @@ class QPSSpider(Spider):
 
         slots = int(self.slots)
         if slots > 1:
-            urls = [url.replace('localhost', '127.0.0.%d' % (x + 1)) for x in xrange(slots)]
+            urls = [url.replace('localhost', '127.0.0.%d' % (x + 1)) for x in range(slots)]
         else:
             urls = [url]
 
diff --git a/extras/scrapy.1 b/extras/scrapy.1
index a4f29569b..2fa8d8231 100644
--- a/extras/scrapy.1
+++ b/extras/scrapy.1
@@ -28,16 +28,16 @@ Query Scrapy settings
 Print raw setting value
 .TP
 .I --getbool=SETTING
-Print setting value, intepreted as a boolean
+Print setting value, interpreted as a boolean
 .TP
 .I --getint=SETTING
-Print setting value, intepreted as an integer
+Print setting value, interpreted as an integer
 .TP
 .I --getfloat=SETTING
-Print setting value, intepreted as an float
+Print setting value, interpreted as a float
 .TP
 .I --getlist=SETTING
-Print setting value, intepreted as an float
+Print setting value, interpreted as a float
 .TP
 .I --init
 Print initial setting value (before loading extensions and spiders)
diff --git a/extras/scrapy_bash_completion b/extras/scrapy_bash_completion
index 19fd81c87..07a7d88c5 100644
--- a/extras/scrapy_bash_completion
+++ b/extras/scrapy_bash_completion
@@ -11,7 +11,7 @@ _scrapy_completion() {
         ;;
         *)
             if [ $COMP_CWORD -eq 1 ]; then
-                commands="check crawl deploy edit fetch genspider list parse runspider server settings shell startproject version view"
+                commands="check crawl edit fetch genspider list parse runspider settings shell startproject version view"
                 COMPREPLY=(${COMPREPLY[@]:-} $(compgen -W "$commands" -- "$cmd"))
             fi
         ;;
diff --git a/extras/scrapy_zsh_completion b/extras/scrapy_zsh_completion
new file mode 100644
index 000000000..e2f2dc82b
--- /dev/null
+++ b/extras/scrapy_zsh_completion
@@ -0,0 +1,213 @@
+#compdef scrapy
+_scrapy() {
+    local context state state_descr line
+    local ret=1
+    typeset -A opt_args
+    _arguments \
+	"(- 1 *)"{-h,--help}"[Help]" \
+	"1: :->command" \
+	"*:: :->args" && ret=0
+
+    case $state in
+	command)
+	    _scrapy_cmds
+	    ;;
+	args)
+	    case $words[1] in
+		(bench)
+		    _scrapy_glb_opts
+		    ;;
+		(fetch)
+		    local options=(
+			'--headers[print response HTTP headers instead of body]'
+			'--no-redirect[do not handle HTTP 3xx status codes and print response as-is]'
+			'--spider=[use this spider]:spider:_scrapy_spiders'
+			'1::URL:_httpie_urls'
+		    )
+		    _scrapy_glb_opts $options
+		    ;;
+		(genspider)
+		    local options=(
+			{'(--list)-l','(-l)--list'}'[List available templates]'
+			{'(--edit)-e','(-e)--edit'}'[Edit spider after creating it]'
+			'--force[If the spider already exists, overwrite it with the template]'
+			{'(--dump)-d','(-d)--dump='}'[Dump template to standard output]:template:(basic crawl csvfeed xmlfeed)'
+			{'(--template)-t','(-t)--template='}'[Uses a custom template]:template:(basic crawl csvfeed xmlfeed)'
+			'1:name:(NAME)'
+			'2:domain:_httpie_urls'
+		    )
+		    _scrapy_glb_opts $options
+		    ;;
+		(runspider)
+		    local options=(
+			{'(--output)-o','(-o)--output='}'[dump scraped items into FILE (use - for stdout)]:file:_files'
+			{'(--output-format)-t','(-t)--output-format='}'[format to use for dumping items with -o]:format:(FORMAT)'
+			'*-a[set spider argument (may be repeated)]:value pair:(NAME=VALUE)'
+			'1:spider file:_files -g \*.py'
+		    )
+		    _scrapy_glb_opts $options
+		    ;;
+		(settings)
+		    local options=(
+			'--get=[print raw setting value]:option:(SETTING)'
+			'--getbool=[print setting value, interpreted as a boolean]:option:(SETTING)'
+			'--getint=[print setting value, interpreted as an integer]:option:(SETTING)'
+			'--getfloat=[print setting value, interpreted as a float]:option:(SETTING)'
+			'--getlist=[print setting value, interpreted as a list]:option:(SETTING)'
+		    )
+		    _scrapy_glb_opts $options
+		    ;;
+		(shell)
+		    local options=(
+			'-c[evaluate the code in the shell, print the result and exit]:code:(CODE)'
+			'--no-redirect[do not handle HTTP 3xx status codes and print response as-is]'
+			'--spider=[use this spider]:spider:_scrapy_spiders'
+			'::file:_files -g \*.html'
+			'::URL:_httpie_urls'
+		    )
+		    _scrapy_glb_opts $options
+		    ;;
+		(startproject)
+		    local options=(
+			'1:name:(NAME)'
+			'2:dir:_dir_list'
+		    )
+		    _scrapy_glb_opts $options
+		    ;;
+		(version)
+		    local options=(
+			{'(--verbose)-v','(-v)--verbose'}'[also display twisted/python/platform info (useful for bug reports)]'
+		    )
+		    _scrapy_glb_opts $options
+		    ;;
+		(view)
+		    local options=(
+			'--no-redirect[do not handle HTTP 3xx status codes and print response as-is]'
+			'--spider=[use this spider]:spider:_scrapy_spiders'
+			'1:URL:_httpie_urls'
+		    )
+		    _scrapy_glb_opts $options
+		    ;;
+		(check)
+		    local options=(
+			{'(--list)-l','(-l)--list'}'[only list contracts, without checking them]'
+			{'(--verbose)-v','(-v)--verbose'}'[print contract tests for all spiders]'
+			'1:spider:_scrapy_spiders'
+		    )
+		    _scrapy_glb_opts $options
+		    ;;
+		(crawl)
+		    local options=(
+			{'(--output)-o','(-o)--output='}'[dump scraped items into FILE (use - for stdout)]:file:_files'
+			{'(--output-format)-t','(-t)--output-format='}'[format to use for dumping items with -o]:format:(FORMAT)'
+			'*-a[set spider argument (may be repeated)]:value pair:(NAME=VALUE)'
+			'1:spider:_scrapy_spiders'
+		    )
+		    _scrapy_glb_opts $options
+		    ;;
+		(edit)
+		    local options=(
+		      '1:spider:_scrapy_spiders'
+		    )
+		    _scrapy_glb_opts $options
+		    ;;
+		(list)
+		    _scrapy_glb_opts
+		    ;;
+		(parse)
+		    local options=(
+			'*-a[set spider argument (may be repeated)]:value pair:(NAME=VALUE)'
+			'--spider=[use this spider without looking for one]:spider:_scrapy_spiders'
+			'--pipelines[process items through pipelines]'
+			"--nolinks[don't show links to follow (extracted requests)]"
+			"--noitems[don't show scraped items]"
+			'--nocolour[avoid using pygments to colorize the output]'
+			{'(--rules)-r','(-r)--rules'}'[use CrawlSpider rules to discover the callback]'
+			{'(--callback)-c','(-c)--callback'}'[use this callback for parsing, instead looking for a callback]:callback:(CALLBACK)'
+			{'(--meta)-m','(-m)--meta='}'[inject extra meta into the Request, it must be a valid raw json string]:meta:(META)'
+			'--cbkwargs=[inject extra callback kwargs into the Request, it must be a valid raw json string]:arguments:(CBKWARGS)'
+			{'(--depth)-d','(-d)--depth='}'[maximum depth for parsing requests (default: 1)]:depth:(DEPTH)'
+			{'(--verbose)-v','(-v)--verbose'}'[print each depth level one by one]'
+			'1:URL:_httpie_urls'
+		    )
+		    _scrapy_glb_opts $options
+		    ;;
+	    esac
+	    ;;
+    esac
+
+    return ret
+}
+
+_scrapy_cmds() {
+    local -a commands project_commands
+    commands=(
+        'bench:Run quick benchmark test'
+        'fetch:Fetch a URL using the Scrapy downloader'
+        'genspider:Generate new spider using pre-defined templates'
+        'runspider:Run a self-contained spider (without creating a project)'
+        'settings:Get settings values'
+        'shell:Interactive scraping console'
+        'startproject:Create new project'
+        'version:Print Scrapy version'
+        'view:Open URL in browser, as seen by Scrapy'
+    )
+    project_commands=(
+	'check:Check spider contracts'
+	'crawl:Run a spider'
+        'edit:Edit spider'
+        'list:List available spiders'
+        'parse:Parse URL (using its spider) and print the results'
+    )
+    if [[ $(scrapy -h | grep -s "no active project") == "" ]]; then
+	commands=(${commands[@]} ${project_commands[@]})
+    fi
+    _describe -t common-commands 'common commands' commands && ret=0
+}
+
+_scrapy_glb_opts() {
+    local -a options
+    options=(
+	'(- *)'{-h,--help}'[show this help message and exit]'
+	'(--nolog)--logfile=[log file. if omitted stderr will be used]:file:_files'
+	'--pidfile=[write process ID to FILE]:file:_files'
+	'--profile=[write python cProfile stats to FILE]:file:_files'
+	{'(--loglevel --nolog)-L','(-L --nolog)--loglevel='}'[log level (default: INFO)]:log level:(DEBUG INFO WARN ERROR)'
+	'(-L --loglevel --logfile)--nolog[disable logging completely]'
+	'--pdb[enable pdb on failure]'
+	'*'{-s,--set=}'[set/override setting (may be repeated)]:value pair:(NAME=VALUE)'
+    )
+    options=(${options[@]} "$@")
+    _arguments -A "-*" $options && ret=0
+}
+
+_httpie_urls() {
+
+  local ret=1
+
+  if ! [[ -prefix [-+.a-z0-9]#:// ]]; then
+    local expl
+    compset -S '[^:/]*' && compstate[to_end]=''
+    _wanted url-schemas expl 'URL schema' compadd -S '' http:// https:// && ret=0
+  else
+    _urls && ret=0
+  fi
+
+  return $ret
+
+}
+
+_scrapy_spiders() {
+
+  local ret=1
+
+  if [[ $(scrapy -h | grep -s "no active project") == "" ]]; then
+      compadd -S '' $(scrapy list) && ret=0
+  else
+      compadd -S '' SPIDER && ret=0
+  fi
+
+  return $ret
+}
+
+_scrapy $@
diff --git a/pylintrc b/pylintrc
new file mode 100644
index 000000000..129c7bf7d
--- /dev/null
+++ b/pylintrc
@@ -0,0 +1,113 @@
+[MASTER]
+persistent=no
+jobs=1  # >1 hides results
+
+[MESSAGES CONTROL]
+disable=abstract-method,
+        anomalous-backslash-in-string,
+        arguments-differ,
+        attribute-defined-outside-init,
+        bad-classmethod-argument,
+        bad-continuation,
+        bad-indentation,
+        bad-mcs-classmethod-argument,
+        bad-super-call,
+        bad-whitespace,
+        bare-except,
+        blacklisted-name,
+        broad-except,
+        c-extension-no-member,
+        catching-non-exception,
+        cell-var-from-loop,
+        comparison-with-callable,
+        consider-iterating-dictionary,
+        consider-using-in,
+        consider-using-set-comprehension,
+        consider-using-sys-exit,
+        cyclic-import,
+        dangerous-default-value,
+        deprecated-method,
+        deprecated-module,
+        duplicate-code,  # https://github.com/PyCQA/pylint/issues/214
+        eval-used,
+        expression-not-assigned,
+        fixme,
+        function-redefined,
+        global-statement,
+        import-error,
+        import-outside-toplevel,
+        import-self,
+        inconsistent-return-statements,
+        inherit-non-class,
+        invalid-name,
+        invalid-overridden-method,
+        isinstance-second-argument-not-valid-type,
+        keyword-arg-before-vararg,
+        line-too-long,
+        logging-format-interpolation,
+        logging-not-lazy,
+        lost-exception,
+        method-hidden,
+        misplaced-comparison-constant,
+        missing-docstring,
+        missing-final-newline,
+        multiple-imports,
+        multiple-statements,
+        no-else-continue,
+        no-else-raise,
+        no-else-return,
+        no-init,
+        no-member,
+        no-method-argument,
+        no-name-in-module,
+        no-self-argument,
+        no-self-use,
+        no-value-for-parameter,
+        not-an-iterable,
+        not-callable,
+        pointless-statement,
+        pointless-string-statement,
+        protected-access,
+        redefined-argument-from-local,
+        redefined-builtin,
+        redefined-outer-name,
+        reimported,
+        signature-differs,
+        singleton-comparison,
+        super-init-not-called,
+        superfluous-parens,
+        too-few-public-methods,
+        too-many-ancestors,
+        too-many-arguments,
+        too-many-branches,
+        too-many-format-args,
+        too-many-function-args,
+        too-many-instance-attributes,
+        too-many-lines,
+        too-many-locals,
+        too-many-public-methods,
+        too-many-return-statements,
+        trailing-newlines,
+        trailing-whitespace,
+        unbalanced-tuple-unpacking,
+        undefined-variable,
+        undefined-loop-variable,
+        unexpected-special-method-signature,
+        ungrouped-imports,
+        unidiomatic-typecheck,
+        unnecessary-comprehension,
+        unnecessary-lambda,
+        unnecessary-pass,
+        unreachable,
+        unsubscriptable-object,
+        unused-argument,
+        unused-import,
+        unused-variable,
+        unused-wildcard-import,
+        used-before-assignment,
+        useless-object-inheritance,  # Required for Python 2 support
+        useless-return,
+        useless-super-delegation,
+        wildcard-import,
+        wrong-import-order,
+        wrong-import-position
diff --git a/pytest.ini b/pytest.ini
index 355ee31c3..ca8191f42 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,5 +1,43 @@
 [pytest]
-usefixtures = chdir setlog
+usefixtures = chdir
 python_files=test_*.py __init__.py
-addopts = --doctest-modules --assert=plain
+python_classes=
+addopts =
+    --assert=plain
+    --doctest-modules
+    --ignore=docs/_ext
+    --ignore=docs/conf.py
+    --ignore=docs/news.rst
+    --ignore=docs/topics/dynamic-content.rst
+    --ignore=docs/topics/items.rst
+    --ignore=docs/topics/leaks.rst
+    --ignore=docs/topics/loaders.rst
+    --ignore=docs/topics/selectors.rst
+    --ignore=docs/topics/shell.rst
+    --ignore=docs/topics/stats.rst
+    --ignore=docs/topics/telnetconsole.rst
+    --ignore=docs/utils
 twisted = 1
+markers =
+    only_asyncio: marks tests as only enabled when --reactor=asyncio is passed
+flake8-max-line-length = 119
+flake8-ignore =
+    W503
+
+    # Exclude files that are meant to provide top-level imports
+    # E402: Module level import not at top of file
+    # F401: Module imported but unused
+    scrapy/__init__.py E402
+    scrapy/core/downloader/handlers/http.py F401
+    scrapy/http/__init__.py F401
+    scrapy/linkextractors/__init__.py E402 F401
+    scrapy/selector/__init__.py F401
+    scrapy/spiders/__init__.py E402 F401
+
+    # Issues pending a review:
+    scrapy/utils/http.py F403
+    scrapy/utils/markup.py F403
+    scrapy/utils/multipart.py F403
+    scrapy/utils/url.py F403 F405
+    tests/test_loader.py E741
+    
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 005b8f4f5..000000000
--- a/requirements.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-Twisted>=10.0.0
-lxml
-pyOpenSSL
-cssselect>=0.9
-w3lib>=1.8.0
-queuelib
-six>=1.5.2
diff --git a/scrapy/VERSION b/scrapy/VERSION
index af2dabf3f..276cbf9e2 100644
--- a/scrapy/VERSION
+++ b/scrapy/VERSION
@@ -1 +1 @@
-0.25.1
+2.3.0
diff --git a/scrapy/__init__.py b/scrapy/__init__.py
index 10e9091b6..f0259a9b7 100644
--- a/scrapy/__init__.py
+++ b/scrapy/__init__.py
@@ -1,60 +1,42 @@
 """
-Scrapy - a screen scraping framework written in Python
+Scrapy - a web crawling and web scraping framework written for Python
 """
 
-__all__ = ['__version__', 'version_info', 'optional_features', 'twisted_version',
-           'Spider', 'Request', 'FormRequest', 'Selector', 'Item', 'Field']
-
-# Scrapy version
 import pkgutil
-__version__ = pkgutil.get_data(__package__, 'VERSION').decode('ascii').strip()
-version_info = tuple(int(v) if v.isdigit() else v
-                     for v in __version__.split('.'))
-del pkgutil
-
-# Check minimum required Python version
 import sys
-if sys.version_info < (2, 7):
-    print("Scrapy %s requires Python 2.7" % __version__)
-    sys.exit(1)
-
-# Ignore noisy twisted deprecation warnings
 import warnings
-warnings.filterwarnings('ignore', category=DeprecationWarning, module='twisted')
-del warnings
-
-# Apply monkey patches to fix issues in external libraries
-from . import _monkeypatches
-del _monkeypatches
-
-# WARNING: optional_features set is deprecated and will be removed soon. Do not use.
-optional_features = set()
-# TODO: backwards compatibility, remove for Scrapy 0.20
-optional_features.add('ssl')
-try:
-    import boto
-    del boto
-except ImportError:
-    pass
-else:
-    optional_features.add('boto')
-try:
-    import django
-    del django
-except ImportError:
-    pass
-else:
-    optional_features.add('django')
 
 from twisted import version as _txv
-twisted_version = (_txv.major, _txv.minor, _txv.micro)
-if twisted_version >= (11, 1, 0):
-    optional_features.add('http11')
 
 # Declare top-level shortcuts
-from scrapy.spider import Spider
+from scrapy.spiders import Spider
 from scrapy.http import Request, FormRequest
 from scrapy.selector import Selector
 from scrapy.item import Item, Field
 
+
+__all__ = [
+    '__version__', 'version_info', 'twisted_version', 'Spider',
+    'Request', 'FormRequest', 'Selector', 'Item', 'Field',
+]
+
+
+# Scrapy and Twisted versions
+__version__ = pkgutil.get_data(__package__, 'VERSION').decode('ascii').strip()
+version_info = tuple(int(v) if v.isdigit() else v for v in __version__.split('.'))
+twisted_version = (_txv.major, _txv.minor, _txv.micro)
+
+
+# Check minimum required Python version
+if sys.version_info < (3, 5, 2):
+    print("Scrapy %s requires Python 3.5.2" % __version__)
+    sys.exit(1)
+
+
+# Ignore noisy twisted deprecation warnings
+warnings.filterwarnings('ignore', category=DeprecationWarning, module='twisted')
+
+
+del pkgutil
 del sys
+del warnings
diff --git a/scrapy/__main__.py b/scrapy/__main__.py
new file mode 100644
index 000000000..e467e057f
--- /dev/null
+++ b/scrapy/__main__.py
@@ -0,0 +1,4 @@
+from scrapy.cmdline import execute
+
+if __name__ == '__main__':
+    execute()
diff --git a/scrapy/_monkeypatches.py b/scrapy/_monkeypatches.py
deleted file mode 100644
index e0ae45b64..000000000
--- a/scrapy/_monkeypatches.py
+++ /dev/null
@@ -1,14 +0,0 @@
-import sys
-
-if sys.version_info[0] == 2:
-    from urlparse import urlparse
-
-    # workaround for http://bugs.python.org/issue7904 - Python < 2.7
-    if urlparse('s3://bucket/key').netloc != 'bucket':
-        from urlparse import uses_netloc
-        uses_netloc.append('s3')
-
-    # workaround for http://bugs.python.org/issue9374 - Python < 2.7.4
-    if urlparse('s3://bucket/key?key=value').query != 'key=value':
-        from urlparse import uses_query
-        uses_query.append('s3')
diff --git a/scrapy/cmdline.py b/scrapy/cmdline.py
index 14787950b..3e88536e4 100644
--- a/scrapy/cmdline.py
+++ b/scrapy/cmdline.py
@@ -1,5 +1,5 @@
-from __future__ import print_function
 import sys
+import os
 import optparse
 import cProfile
 import inspect
@@ -7,23 +7,27 @@ import pkg_resources
 
 import scrapy
 from scrapy.crawler import CrawlerProcess
-from scrapy.xlib import lsprofcalltree
-from scrapy.command import ScrapyCommand
+from scrapy.commands import ScrapyCommand
 from scrapy.exceptions import UsageError
 from scrapy.utils.misc import walk_modules
 from scrapy.utils.project import inside_project, get_project_settings
-from scrapy.settings.deprecated import check_deprecated_settings
+from scrapy.utils.python import garbage_collect
+
 
 def _iter_command_classes(module_name):
     # TODO: add `name` attribute to commands and and merge this function with
     # scrapy.utils.spider.iter_spider_classes
     for module in walk_modules(module_name):
-        for obj in vars(module).itervalues():
-            if inspect.isclass(obj) and \
-               issubclass(obj, ScrapyCommand) and \
-               obj.__module__ == module.__name__:
+        for obj in vars(module).values():
+            if (
+                inspect.isclass(obj)
+                and issubclass(obj, ScrapyCommand)
+                and obj.__module__ == module.__name__
+                and not obj == ScrapyCommand
+            ):
                 yield obj
 
+
 def _get_commands_from_module(module, inproject):
     d = {}
     for cmd in _iter_command_classes(module):
@@ -32,6 +36,7 @@ def _get_commands_from_module(module, inproject):
             d[cmdname] = cmd()
     return d
 
+
 def _get_commands_from_entry_points(inproject, group='scrapy.commands'):
     cmds = {}
     for entry_point in pkg_resources.iter_entry_points(group):
@@ -42,6 +47,7 @@ def _get_commands_from_entry_points(inproject, group='scrapy.commands'):
             raise Exception("Invalid entry point %s" % entry_point.name)
     return cmds
 
+
 def _get_commands_dict(settings, inproject):
     cmds = _get_commands_from_module('scrapy.commands', inproject)
     cmds.update(_get_commands_from_entry_points(inproject))
@@ -50,6 +56,7 @@ def _get_commands_dict(settings, inproject):
         cmds.update(_get_commands_from_module(cmds_module, inproject))
     return cmds
 
+
 def _pop_command_name(argv):
     i = 0
     for arg in argv[1:]:
@@ -58,13 +65,15 @@ def _pop_command_name(argv):
             return arg
         i += 1
 
+
 def _print_header(settings, inproject):
     if inproject:
-        print("Scrapy %s - project: %s\n" % (scrapy.__version__, \
-            settings['BOT_NAME']))
+        print("Scrapy %s - project: %s\n" % (scrapy.__version__,
+                                             settings['BOT_NAME']))
     else:
         print("Scrapy %s - no active project\n" % scrapy.__version__)
 
+
 def _print_commands(settings, inproject):
     _print_header(settings, inproject)
     print("Usage:")
@@ -79,11 +88,13 @@ def _print_commands(settings, inproject):
     print()
     print('Use "scrapy <command> -h" to see more info about a command')
 
+
 def _print_unknown_command(settings, cmdname, inproject):
     _print_header(settings, inproject)
     print("Unknown command: %s\n" % cmdname)
     print('Use "scrapy" to see available commands')
 
+
 def _run_print_help(parser, func, *a, **kw):
     try:
         func(*a, **kw)
@@ -94,35 +105,26 @@ def _run_print_help(parser, func, *a, **kw):
             parser.print_help()
         sys.exit(2)
 
+
 def execute(argv=None, settings=None):
     if argv is None:
         argv = sys.argv
 
-    # --- backwards compatibility for scrapy.conf.settings singleton ---
-    if settings is None and 'scrapy.conf' in sys.modules:
-        from scrapy import conf
-        if hasattr(conf, 'settings'):
-            settings = conf.settings
-    # ------------------------------------------------------------------
-
     if settings is None:
         settings = get_project_settings()
-    check_deprecated_settings(settings)
-
-    # --- backwards compatibility for scrapy.conf.settings singleton ---
-    import warnings
-    from scrapy.exceptions import ScrapyDeprecationWarning
-    with warnings.catch_warnings():
-        warnings.simplefilter("ignore", ScrapyDeprecationWarning)
-        from scrapy import conf
-        conf.settings = settings
-    # ------------------------------------------------------------------
+        # set EDITOR from environment if available
+        try:
+            editor = os.environ['EDITOR']
+        except KeyError:
+            pass
+        else:
+            settings['EDITOR'] = editor
 
     inproject = inside_project()
     cmds = _get_commands_dict(settings, inproject)
     cmdname = _pop_command_name(argv)
-    parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(), \
-        conflict_handler='resolve')
+    parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(),
+                                   conflict_handler='resolve')
     if not cmdname:
         _print_commands(settings, inproject)
         sys.exit(0)
@@ -143,26 +145,29 @@ def execute(argv=None, settings=None):
     _run_print_help(parser, _run_command, cmd, args, opts)
     sys.exit(cmd.exitcode)
 
+
 def _run_command(cmd, args, opts):
-    if opts.profile or opts.lsprof:
+    if opts.profile:
         _run_command_profiled(cmd, args, opts)
     else:
         cmd.run(args, opts)
 
+
 def _run_command_profiled(cmd, args, opts):
     if opts.profile:
         sys.stderr.write("scrapy: writing cProfile stats to %r\n" % opts.profile)
-    if opts.lsprof:
-        sys.stderr.write("scrapy: writing lsprof stats to %r\n" % opts.lsprof)
     loc = locals()
     p = cProfile.Profile()
     p.runctx('cmd.run(args, opts)', globals(), loc)
     if opts.profile:
         p.dump_stats(opts.profile)
-    k = lsprofcalltree.KCacheGrind(p)
-    if opts.lsprof:
-        with open(opts.lsprof, 'w') as f:
-            k.output(f)
+
 
 if __name__ == '__main__':
-    execute()
+    try:
+        execute()
+    finally:
+        # Twisted prints errors in DebugInfo.__del__, but PyPy does not run gc.collect() on exit:
+        # http://doc.pypy.org/en/latest/cpython_differences.html
+        # ?highlight=gc.collect#differences-related-to-garbage-collection-strategies
+        garbage_collect()
diff --git a/scrapy/command.py b/scrapy/command.py
deleted file mode 100644
index b2eb9cf8f..000000000
--- a/scrapy/command.py
+++ /dev/null
@@ -1,133 +0,0 @@
-"""
-Base class for Scrapy commands
-"""
-import os
-import warnings
-from optparse import OptionGroup
-from twisted.python import failure
-
-from scrapy.utils.conf import arglist_to_dict
-from scrapy.exceptions import UsageError, ScrapyDeprecationWarning
-
-
-class ScrapyCommand(object):
-
-    requires_project = False
-    crawler_process = None
-
-    # default settings to be used for this command instead of global defaults
-    default_settings = {}
-
-    exitcode = 0
-
-    def __init__(self):
-        self.settings = None  # set in scrapy.cmdline
-
-    def set_crawler(self, crawler):
-        assert not hasattr(self, '_crawler'), "crawler already set"
-        self._crawler = crawler
-
-    @property
-    def crawler(self):
-        warnings.warn("Command's default `crawler` is deprecated and will be removed. "
-            "Use `create_crawler` method to instatiate crawlers.",
-            ScrapyDeprecationWarning)
-
-        if not hasattr(self, '_crawler'):
-            crawler = self.crawler_process.create_crawler()
-
-            old_start = crawler.start
-            self.crawler_process.started = False
-
-            def wrapped_start():
-                if self.crawler_process.started:
-                    old_start()
-                else:
-                    self.crawler_process.started = True
-                    self.crawler_process.start()
-
-            crawler.start = wrapped_start
-
-            self.set_crawler(crawler)
-
-        return self._crawler
-
-    def syntax(self):
-        """
-        Command syntax (preferably one-line). Do not include command name.
-        """
-        return ""
-
-    def short_desc(self):
-        """
-        A short description of the command
-        """
-        return ""
-
-    def long_desc(self):
-        """A long description of the command. Return short description when not
-        available. It cannot contain newlines, since contents will be formatted
-        by optparser which removes newlines and wraps text.
-        """
-        return self.short_desc()
-
-    def help(self):
-        """An extensive help for the command. It will be shown when using the
-        "help" command. It can contain newlines, since not post-formatting will
-        be applied to its contents.
-        """
-        return self.long_desc()
-
-    def add_options(self, parser):
-        """
-        Populate option parse with options available for this command
-        """
-        group = OptionGroup(parser, "Global Options")
-        group.add_option("--logfile", metavar="FILE",
-            help="log file. if omitted stderr will be used")
-        group.add_option("-L", "--loglevel", metavar="LEVEL", default=None,
-            help="log level (default: %s)" % self.settings['LOG_LEVEL'])
-        group.add_option("--nolog", action="store_true",
-            help="disable logging completely")
-        group.add_option("--profile", metavar="FILE", default=None,
-            help="write python cProfile stats to FILE")
-        group.add_option("--lsprof", metavar="FILE", default=None,
-            help="write lsprof profiling stats to FILE")
-        group.add_option("--pidfile", metavar="FILE",
-            help="write process ID to FILE")
-        group.add_option("-s", "--set", action="append", default=[], metavar="NAME=VALUE",
-            help="set/override setting (may be repeated)")
-        group.add_option("--pdb", action="store_true", help="enable pdb on failure")
-
-        parser.add_option_group(group)
-
-    def process_options(self, args, opts):
-        try:
-            self.settings.setdict(arglist_to_dict(opts.set),
-                                  priority='cmdline')
-        except ValueError:
-            raise UsageError("Invalid -s value, use -s NAME=VALUE", print_help=False)
-
-        if opts.logfile:
-            self.settings.set('LOG_ENABLED', True, priority='cmdline')
-            self.settings.set('LOG_FILE', opts.logfile, priority='cmdline')
-
-        if opts.loglevel:
-            self.settings.set('LOG_ENABLED', True, priority='cmdline')
-            self.settings.set('LOG_LEVEL', opts.loglevel, priority='cmdline')
-
-        if opts.nolog:
-            self.settings.set('LOG_ENABLED', False, priority='cmdline')
-
-        if opts.pidfile:
-            with open(opts.pidfile, "w") as f:
-                f.write(str(os.getpid()) + os.linesep)
-
-        if opts.pdb:
-            failure.startDebugMode()
-
-    def run(self, args, opts):
-        """
-        Entry point for running commands
-        """
-        raise NotImplementedError
diff --git a/scrapy/commands/__init__.py b/scrapy/commands/__init__.py
index e69de29bb..cfd940fe7 100644
--- a/scrapy/commands/__init__.py
+++ b/scrapy/commands/__init__.py
@@ -0,0 +1,137 @@
+"""
+Base class for Scrapy commands
+"""
+import os
+from optparse import OptionGroup
+from twisted.python import failure
+
+from scrapy.utils.conf import arglist_to_dict, feed_process_params_from_cli
+from scrapy.exceptions import UsageError
+
+
+class ScrapyCommand:
+
+    requires_project = False
+    crawler_process = None
+
+    # default settings to be used for this command instead of global defaults
+    default_settings = {}
+
+    exitcode = 0
+
+    def __init__(self):
+        self.settings = None  # set in scrapy.cmdline
+
+    def set_crawler(self, crawler):
+        if hasattr(self, '_crawler'):
+            raise RuntimeError("crawler already set")
+        self._crawler = crawler
+
+    def syntax(self):
+        """
+        Command syntax (preferably one-line). Do not include command name.
+        """
+        return ""
+
+    def short_desc(self):
+        """
+        A short description of the command
+        """
+        return ""
+
+    def long_desc(self):
+        """A long description of the command. Return short description when not
+        available. It cannot contain newlines, since contents will be formatted
+        by optparser which removes newlines and wraps text.
+        """
+        return self.short_desc()
+
+    def help(self):
+        """An extensive help for the command. It will be shown when using the
+        "help" command. It can contain newlines, since no post-formatting will
+        be applied to its contents.
+        """
+        return self.long_desc()
+
+    def add_options(self, parser):
+        """
+        Populate option parse with options available for this command
+        """
+        group = OptionGroup(parser, "Global Options")
+        group.add_option("--logfile", metavar="FILE",
+                         help="log file. if omitted stderr will be used")
+        group.add_option("-L", "--loglevel", metavar="LEVEL", default=None,
+                         help="log level (default: %s)" % self.settings['LOG_LEVEL'])
+        group.add_option("--nolog", action="store_true",
+                         help="disable logging completely")
+        group.add_option("--profile", metavar="FILE", default=None,
+                         help="write python cProfile stats to FILE")
+        group.add_option("--pidfile", metavar="FILE",
+                         help="write process ID to FILE")
+        group.add_option("-s", "--set", action="append", default=[], metavar="NAME=VALUE",
+                         help="set/override setting (may be repeated)")
+        group.add_option("--pdb", action="store_true", help="enable pdb on failure")
+
+        parser.add_option_group(group)
+
+    def process_options(self, args, opts):
+        try:
+            self.settings.setdict(arglist_to_dict(opts.set),
+                                  priority='cmdline')
+        except ValueError:
+            raise UsageError("Invalid -s value, use -s NAME=VALUE", print_help=False)
+
+        if opts.logfile:
+            self.settings.set('LOG_ENABLED', True, priority='cmdline')
+            self.settings.set('LOG_FILE', opts.logfile, priority='cmdline')
+
+        if opts.loglevel:
+            self.settings.set('LOG_ENABLED', True, priority='cmdline')
+            self.settings.set('LOG_LEVEL', opts.loglevel, priority='cmdline')
+
+        if opts.nolog:
+            self.settings.set('LOG_ENABLED', False, priority='cmdline')
+
+        if opts.pidfile:
+            with open(opts.pidfile, "w") as f:
+                f.write(str(os.getpid()) + os.linesep)
+
+        if opts.pdb:
+            failure.startDebugMode()
+
+    def run(self, args, opts):
+        """
+        Entry point for running commands
+        """
+        raise NotImplementedError
+
+
+class BaseRunSpiderCommand(ScrapyCommand):
+    """
+    Common class used to share functionality between the crawl, parse and runspider commands
+    """
+    def add_options(self, parser):
+        ScrapyCommand.add_options(self, parser)
+        parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE",
+                          help="set spider argument (may be repeated)")
+        parser.add_option("-o", "--output", metavar="FILE", action="append",
+                          help="append scraped items to the end of FILE (use - for stdout)")
+        parser.add_option("-O", "--overwrite-output", metavar="FILE", action="append",
+                          help="dump scraped items into FILE, overwriting any existing file")
+        parser.add_option("-t", "--output-format", metavar="FORMAT",
+                          help="format to use for dumping items")
+
+    def process_options(self, args, opts):
+        ScrapyCommand.process_options(self, args, opts)
+        try:
+            opts.spargs = arglist_to_dict(opts.spargs)
+        except ValueError:
+            raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
+        if opts.output or opts.overwrite_output:
+            feeds = feed_process_params_from_cli(
+                self.settings,
+                opts.output,
+                opts.output_format,
+                opts.overwrite_output,
+            )
+            self.settings.set('FEEDS', feeds, priority='cmdline')
diff --git a/scrapy/commands/bench.py b/scrapy/commands/bench.py
index 18934f976..c9f3b38e0 100644
--- a/scrapy/commands/bench.py
+++ b/scrapy/commands/bench.py
@@ -1,12 +1,11 @@
 import sys
 import time
 import subprocess
-
-from six.moves.urllib.parse import urlencode
+from urllib.parse import urlencode
 
 import scrapy
-from scrapy.command import ScrapyCommand
-from scrapy.contrib.linkextractors import LinkExtractor
+from scrapy.commands import ScrapyCommand
+from scrapy.linkextractors import LinkExtractor
 
 
 class Command(ScrapyCommand):
@@ -22,13 +21,11 @@ class Command(ScrapyCommand):
 
     def run(self, args, opts):
         with _BenchServer():
-            spider = _BenchSpider(total=100000)
-            crawler = self.crawler_process.create_crawler()
-            crawler.crawl(spider)
+            self.crawler_process.crawl(_BenchSpider, total=100000)
             self.crawler_process.start()
 
 
-class _BenchServer(object):
+class _BenchServer:
 
     def __enter__(self):
         from scrapy.utils.test import get_testenv
diff --git a/scrapy/commands/check.py b/scrapy/commands/check.py
index 40ff9014b..09a76ca7a 100644
--- a/scrapy/commands/check.py
+++ b/scrapy/commands/check.py
@@ -1,12 +1,10 @@
-from __future__ import print_function
 import time
-import sys
 from collections import defaultdict
 from unittest import TextTestRunner, TextTestResult as _TextTestResult
 
-from scrapy.command import ScrapyCommand
+from scrapy.commands import ScrapyCommand
 from scrapy.contracts import ContractsManager
-from scrapy.utils.misc import load_object
+from scrapy.utils.misc import load_object, set_environ
 from scrapy.utils.conf import build_component_list
 
 
@@ -58,58 +56,41 @@ class Command(ScrapyCommand):
 
     def run(self, args, opts):
         # load contracts
-        contracts = build_component_list(
-            self.settings['SPIDER_CONTRACTS_BASE'],
-            self.settings['SPIDER_CONTRACTS'],
-        )
-        conman = ContractsManager([load_object(c) for c in contracts])
+        contracts = build_component_list(self.settings.getwithbase('SPIDER_CONTRACTS'))
+        conman = ContractsManager(load_object(c) for c in contracts)
         runner = TextTestRunner(verbosity=2 if opts.verbose else 1)
         result = TextTestResult(runner.stream, runner.descriptions, runner.verbosity)
 
         # contract requests
         contract_reqs = defaultdict(list)
 
-        spman_cls = load_object(self.settings['SPIDER_MANAGER_CLASS'])
-        spiders = spman_cls.from_settings(self.settings)
+        spider_loader = self.crawler_process.spider_loader
 
-        for spider in args or spiders.list():
-            spider = spiders.create(spider)
-            requests = self.get_requests(spider, conman, result)
-            contract_reqs[spider.name] = []
+        with set_environ(SCRAPY_CHECK='true'):
+            for spidername in args or spider_loader.list():
+                spidercls = spider_loader.load(spidername)
+                spidercls.start_requests = lambda s: conman.from_spider(s, result)
 
+                tested_methods = conman.tested_methods_from_spidercls(spidercls)
+                if opts.list:
+                    for method in tested_methods:
+                        contract_reqs[spidercls.name].append(method)
+                elif tested_methods:
+                    self.crawler_process.crawl(spidercls)
+
+            # start checks
             if opts.list:
-                for req in requests:
-                    contract_reqs[spider.name].append(req.callback.__name__)
-            elif requests:
-                crawler = self.crawler_process.create_crawler(spider.name)
-                crawler.crawl(spider, requests)
+                for spider, methods in sorted(contract_reqs.items()):
+                    if not methods and not opts.verbose:
+                        continue
+                    print(spider)
+                    for method in sorted(methods):
+                        print('  * %s' % method)
+            else:
+                start = time.time()
+                self.crawler_process.start()
+                stop = time.time()
 
-        # start checks
-        if opts.list:
-            for spider, methods in sorted(contract_reqs.items()):
-                if not methods and not opts.verbose:
-                    continue
-                print(spider)
-                for method in sorted(methods):
-                    print('  * %s' % method)
-        else:
-            start = time.time()
-            self.crawler_process.start()
-            stop = time.time()
-
-            result.printErrors()
-            result.printSummary(start, stop)
-            self.exitcode = int(not result.wasSuccessful())
-
-    def get_requests(self, spider, conman, result):
-        requests = []
-
-        for key, value in vars(type(spider)).items():
-            if callable(value) and value.__doc__:
-                bound_method = value.__get__(spider, type(spider))
-                request = conman.from_method(bound_method, result)
-
-                if request:
-                    requests.append(request)
-
-        return requests
+                result.printErrors()
+                result.printSummary(start, stop)
+                self.exitcode = int(not result.wasSuccessful())
diff --git a/scrapy/commands/crawl.py b/scrapy/commands/crawl.py
index 0976de51b..f205c40b0 100644
--- a/scrapy/commands/crawl.py
+++ b/scrapy/commands/crawl.py
@@ -1,10 +1,8 @@
-import os
-from scrapy.command import ScrapyCommand
-from scrapy.utils.conf import arglist_to_dict
+from scrapy.commands import BaseRunSpiderCommand
 from scrapy.exceptions import UsageError
 
 
-class Command(ScrapyCommand):
+class Command(BaseRunSpiderCommand):
 
     requires_project = True
 
@@ -14,39 +12,6 @@ class Command(ScrapyCommand):
     def short_desc(self):
         return "Run a spider"
 
-    def add_options(self, parser):
-        ScrapyCommand.add_options(self, parser)
-        parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE",
-                          help="set spider argument (may be repeated)")
-        parser.add_option("-o", "--output", metavar="FILE",
-                          help="dump scraped items into FILE (use - for stdout)")
-        parser.add_option("-t", "--output-format", metavar="FORMAT",
-                          help="format to use for dumping items with -o")
-
-    def process_options(self, args, opts):
-        ScrapyCommand.process_options(self, args, opts)
-        try:
-            opts.spargs = arglist_to_dict(opts.spargs)
-        except ValueError:
-            raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
-        if opts.output:
-            if opts.output == '-':
-                self.settings.set('FEED_URI', 'stdout:', priority='cmdline')
-            else:
-                self.settings.set('FEED_URI', opts.output, priority='cmdline')
-            valid_output_formats = (
-                list(self.settings.getdict('FEED_EXPORTERS').keys()) +
-                list(self.settings.getdict('FEED_EXPORTERS_BASE').keys())
-            )
-            if not opts.output_format:
-                opts.output_format = os.path.splitext(opts.output)[1].replace(".", "")
-            if opts.output_format not in valid_output_formats:
-                raise UsageError("Unrecognized output format '%s', set one"
-                                 " using the '-t' switch or as a file extension"
-                                 " from the supported list %s" % (opts.output_format,
-                                                                  tuple(valid_output_formats)))
-            self.settings.set('FEED_FORMAT', opts.output_format, priority='cmdline')
-
     def run(self, args, opts):
         if len(args) < 1:
             raise UsageError()
@@ -54,7 +19,15 @@ class Command(ScrapyCommand):
             raise UsageError("running 'scrapy crawl' with more than one spider is no longer supported")
         spname = args[0]
 
-        crawler = self.crawler_process.create_crawler()
-        spider = crawler.spiders.create(spname, **opts.spargs)
-        crawler.crawl(spider)
-        self.crawler_process.start()
+        crawl_defer = self.crawler_process.crawl(spname, **opts.spargs)
+
+        if getattr(crawl_defer, 'result', None) is not None and issubclass(crawl_defer.result.type, Exception):
+            self.exitcode = 1
+        else:
+            self.crawler_process.start()
+
+            if (
+                self.crawler_process.bootstrap_failed
+                or hasattr(self.crawler_process, 'has_exception') and self.crawler_process.has_exception
+            ):
+                self.exitcode = 1
diff --git a/scrapy/commands/deploy.py b/scrapy/commands/deploy.py
deleted file mode 100644
index 051ce661a..000000000
--- a/scrapy/commands/deploy.py
+++ /dev/null
@@ -1,258 +0,0 @@
-from __future__ import print_function
-import sys
-import os
-import glob
-import tempfile
-import shutil
-import time
-import urllib2
-import netrc
-import json
-from six.moves.urllib.parse import urlparse, urljoin
-from subprocess import Popen, PIPE, check_call
-
-from w3lib.form import encode_multipart
-
-from scrapy.command import ScrapyCommand
-from scrapy.exceptions import UsageError
-from scrapy.utils.http import basic_auth_header
-from scrapy.utils.python import retry_on_eintr
-from scrapy.utils.conf import get_config, closest_scrapy_cfg
-
-_SETUP_PY_TEMPLATE = \
-"""# Automatically created by: scrapy deploy
-
-from setuptools import setup, find_packages
-
-setup(
-    name         = 'project',
-    version      = '1.0',
-    packages     = find_packages(),
-    entry_points = {'scrapy': ['settings = %(settings)s']},
-)
-"""
-
-class Command(ScrapyCommand):
-
-    requires_project = True
-
-    def syntax(self):
-        return "[options] [ [target] | -l | -L <target> ]"
-
-    def short_desc(self):
-        return "Deploy project in Scrapyd target"
-
-    def long_desc(self):
-        return "Deploy the current project into the given Scrapyd server " \
-            "(known as target)"
-
-    def add_options(self, parser):
-        ScrapyCommand.add_options(self, parser)
-        parser.add_option("-p", "--project",
-            help="the project name in the target")
-        parser.add_option("-v", "--version",
-            help="the version to deploy. Defaults to current timestamp")
-        parser.add_option("-l", "--list-targets", action="store_true", \
-            help="list available targets")
-        parser.add_option("-d", "--debug", action="store_true",
-            help="debug mode (do not remove build dir)")
-        parser.add_option("-L", "--list-projects", metavar="TARGET", \
-            help="list available projects on TARGET")
-        parser.add_option("--egg", metavar="FILE",
-            help="use the given egg, instead of building it")
-        parser.add_option("--build-egg", metavar="FILE",
-            help="only build the egg, don't deploy it")
-
-    def run(self, args, opts):
-        try:
-            import setuptools
-        except ImportError:
-            raise UsageError("setuptools not installed")
-
-        urllib2.install_opener(urllib2.build_opener(HTTPRedirectHandler))
-
-        if opts.list_targets:
-            for name, target in _get_targets().items():
-                print("%-20s %s" % (name, target['url']))
-            return
-
-        if opts.list_projects:
-            target = _get_target(opts.list_projects)
-            req = urllib2.Request(_url(target, 'listprojects.json'))
-            _add_auth_header(req, target)
-            f = urllib2.urlopen(req)
-            projects = json.loads(f.read())['projects']
-            print(os.linesep.join(projects))
-            return
-
-        tmpdir = None
-
-        if opts.build_egg: # build egg only
-            egg, tmpdir = _build_egg()
-            _log("Writing egg to %s" % opts.build_egg)
-            shutil.copyfile(egg, opts.build_egg)
-        else: # buld egg and deploy
-            target_name = _get_target_name(args)
-            target = _get_target(target_name)
-            project = _get_project(target, opts)
-            version = _get_version(target, opts)
-            if opts.egg:
-                _log("Using egg: %s" % opts.egg)
-                egg = opts.egg
-            else:
-                _log("Packing version %s" % version)
-                egg, tmpdir = _build_egg()
-            if not _upload_egg(target, egg, project, version):
-                self.exitcode = 1
-
-        if tmpdir:
-            if opts.debug:
-                _log("Output dir not removed: %s" % tmpdir)
-            else:
-                shutil.rmtree(tmpdir)
-
-def _log(message):
-    sys.stderr.write(message + os.linesep)
-
-def _get_target_name(args):
-    if len(args) > 1:
-        raise UsageError("Too many arguments: %s" % ' '.join(args))
-    elif args:
-        return args[0]
-    elif len(args) < 1:
-        return 'default'
-
-def _get_project(target, opts):
-    project = opts.project or target.get('project')
-    if not project:
-        raise UsageError("Missing project")
-    return project
-
-def _get_option(section, option, default=None):
-    cfg = get_config()
-    return cfg.get(section, option) if cfg.has_option(section, option) \
-        else default
-
-def _get_targets():
-    cfg = get_config()
-    baset = dict(cfg.items('deploy')) if cfg.has_section('deploy') else {}
-    targets = {}
-    if 'url' in baset:
-        targets['default'] = baset
-    for x in cfg.sections():
-        if x.startswith('deploy:'):
-            t = baset.copy()
-            t.update(cfg.items(x))
-            targets[x[7:]] = t
-    return targets
-
-def _get_target(name):
-    try:
-        return _get_targets()[name]
-    except KeyError:
-        raise UsageError("Unknown target: %s" % name)
-
-def _url(target, action):
-    return urljoin(target['url'], action)
-
-def _get_version(target, opts):
-    version = opts.version or target.get('version')
-    if version == 'HG':
-        p = Popen(['hg', 'tip', '--template', '{rev}'], stdout=PIPE)
-        d = 'r%s' % p.communicate()[0]
-        p = Popen(['hg', 'branch'], stdout=PIPE)
-        b = p.communicate()[0].strip('\n')
-        return '%s-%s' % (d, b)
-    elif version == 'GIT':
-        p = Popen(['git', 'describe', '--always'], stdout=PIPE)
-        d = p.communicate()[0].strip('\n')
-        p = Popen(['git', 'rev-parse', '--abbrev-ref', 'HEAD'], stdout=PIPE)
-        b = p.communicate()[0].strip('\n')
-        return '%s-%s' % (d, b)
-    elif version:
-        return version
-    else:
-        return str(int(time.time()))
-
-def _upload_egg(target, eggpath, project, version):
-    with open(eggpath, 'rb') as f:
-        eggdata = f.read()
-    data = {
-        'project': project,
-        'version': version,
-        'egg': ('project.egg', eggdata),
-    }
-    body, boundary = encode_multipart(data)
-    url = _url(target, 'addversion.json')
-    headers = {
-        'Content-Type': 'multipart/form-data; boundary=%s' % boundary,
-        'Content-Length': str(len(body)),
-    }
-    req = urllib2.Request(url, body, headers)
-    _add_auth_header(req, target)
-    _log('Deploying to project "%s" in %s' % (project, url))
-    return _http_post(req)
-
-def _add_auth_header(request, target):
-    if 'username' in target:
-        u, p = target.get('username'), target.get('password', '')
-        request.add_header('Authorization', basic_auth_header(u, p))
-    else: # try netrc
-        try:
-            host = urlparse(target['url']).hostname
-            a = netrc.netrc().authenticators(host)
-            request.add_header('Authorization', basic_auth_header(a[0], a[2]))
-        except (netrc.NetrcParseError, IOError, TypeError):
-            pass
-
-def _http_post(request):
-    try:
-        f = urllib2.urlopen(request)
-        _log("Server response (%s):" % f.code)
-        print(f.read())
-        return True
-    except urllib2.HTTPError as e:
-        _log("Deploy failed (%s):" % e.code)
-        print(e.read())
-    except urllib2.URLError as e:
-        _log("Deploy failed: %s" % e)
-
-def _build_egg():
-    closest = closest_scrapy_cfg()
-    os.chdir(os.path.dirname(closest))
-    if not os.path.exists('setup.py'):
-        settings = get_config().get('settings', 'default')
-        _create_default_setup_py(settings=settings)
-    d = tempfile.mkdtemp(prefix="scrapydeploy-")
-    o = open(os.path.join(d, "stdout"), "wb")
-    e = open(os.path.join(d, "stderr"), "wb")
-    retry_on_eintr(check_call, [sys.executable, 'setup.py', 'clean', '-a', 'bdist_egg', '-d', d], stdout=o, stderr=e)
-    o.close()
-    e.close()
-    egg = glob.glob(os.path.join(d, '*.egg'))[0]
-    return egg, d
-
-def _create_default_setup_py(**kwargs):
-    with open('setup.py', 'w') as f:
-        f.write(_SETUP_PY_TEMPLATE % kwargs)
-
-
-class HTTPRedirectHandler(urllib2.HTTPRedirectHandler):
-
-    def redirect_request(self, req, fp, code, msg, headers, newurl):
-        newurl = newurl.replace(' ', '%20')
-        if code in (301, 307):
-            return urllib2.Request(newurl,
-                                   data=req.get_data(),
-                                   headers=req.headers,
-                                   origin_req_host=req.get_origin_req_host(),
-                                   unverifiable=True)
-        elif code in (302, 303):
-            newheaders = dict((k, v) for k, v in req.headers.items()
-                              if k.lower() not in ("content-length", "content-type"))
-            return urllib2.Request(newurl,
-                                   headers=newheaders,
-                                   origin_req_host=req.get_origin_req_host(),
-                                   unverifiable=True)
-        else:
-            raise urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp)
diff --git a/scrapy/commands/edit.py b/scrapy/commands/edit.py
index b542217e1..25d843a53 100644
--- a/scrapy/commands/edit.py
+++ b/scrapy/commands/edit.py
@@ -1,8 +1,10 @@
-import sys, os
+import sys
+import os
 
-from scrapy.command import ScrapyCommand
+from scrapy.commands import ScrapyCommand
 from scrapy.exceptions import UsageError
 
+
 class Command(ScrapyCommand):
 
     requires_project = True
@@ -15,7 +17,8 @@ class Command(ScrapyCommand):
         return "Edit spider"
 
     def long_desc(self):
-        return "Edit a spider using the editor defined in EDITOR setting"
+        return ("Edit a spider using the editor defined in the EDITOR environment"
+                " variable or else the EDITOR setting")
 
     def _err(self, msg):
         sys.stderr.write(msg + os.linesep)
@@ -25,13 +28,12 @@ class Command(ScrapyCommand):
         if len(args) != 1:
             raise UsageError()
 
-        crawler = self.crawler_process.create_crawler()
-        editor = crawler.settings['EDITOR']
+        editor = self.settings['EDITOR']
         try:
-            spider = crawler.spiders.create(args[0])
+            spidercls = self.crawler_process.spider_loader.load(args[0])
         except KeyError:
             return self._err("Spider not found: %s" % args[0])
 
-        sfile = sys.modules[spider.__module__].__file__
+        sfile = sys.modules[spidercls.__module__].__file__
         sfile = sfile.replace('.pyc', '.py')
         self.exitcode = os.system('%s "%s"' % (editor, sfile))
diff --git a/scrapy/commands/fetch.py b/scrapy/commands/fetch.py
index 373d323c7..95f87e8c3 100644
--- a/scrapy/commands/fetch.py
+++ b/scrapy/commands/fetch.py
@@ -1,11 +1,12 @@
-from __future__ import print_function
+import sys
 from w3lib.url import is_url
 
-from scrapy.command import ScrapyCommand
+from scrapy.commands import ScrapyCommand
 from scrapy.http import Request
-from scrapy.spider import Spider
 from scrapy.exceptions import UsageError
-from scrapy.utils.spider import create_spider_for_request
+from scrapy.utils.datatypes import SequenceExclude
+from scrapy.utils.spider import spidercls_for_request, DefaultSpider
+
 
 class Command(ScrapyCommand):
 
@@ -18,42 +19,52 @@ class Command(ScrapyCommand):
         return "Fetch a URL using the Scrapy downloader"
 
     def long_desc(self):
-        return "Fetch a URL using the Scrapy downloader and print its content " \
-            "to stdout. You may want to use --nolog to disable logging"
+        return (
+            "Fetch a URL using the Scrapy downloader and print its content"
+            " to stdout. You may want to use --nolog to disable logging"
+        )
 
     def add_options(self, parser):
         ScrapyCommand.add_options(self, parser)
-        parser.add_option("--spider", dest="spider",
-            help="use this spider")
-        parser.add_option("--headers", dest="headers", action="store_true", \
-            help="print response HTTP headers instead of body")
+        parser.add_option("--spider", dest="spider", help="use this spider")
+        parser.add_option("--headers", dest="headers", action="store_true",
+                          help="print response HTTP headers instead of body")
+        parser.add_option("--no-redirect", dest="no_redirect", action="store_true", default=False,
+                          help="do not handle HTTP 3xx status codes and print response as-is")
 
     def _print_headers(self, headers, prefix):
         for key, values in headers.items():
             for value in values:
-                print('%s %s: %s' % (prefix, key, value))
+                self._print_bytes(prefix + b' ' + key + b': ' + value)
 
     def _print_response(self, response, opts):
         if opts.headers:
-            self._print_headers(response.request.headers, '>')
+            self._print_headers(response.request.headers, b'>')
             print('>')
-            self._print_headers(response.headers, '<')
+            self._print_headers(response.headers, b'<')
         else:
-            print(response.body)
+            self._print_bytes(response.body)
+
+    def _print_bytes(self, bytes_):
+        sys.stdout.buffer.write(bytes_ + b'\n')
 
     def run(self, args, opts):
         if len(args) != 1 or not is_url(args[0]):
             raise UsageError()
-        cb = lambda x: self._print_response(x, opts)
-        request = Request(args[0], callback=cb, dont_filter=True)
-        request.meta['handle_httpstatus_all'] = True
-
-        crawler = self.crawler_process.create_crawler()
-        spider = None
-        if opts.spider:
-            spider = crawler.spiders.create(opts.spider)
+        request = Request(args[0], callback=self._print_response,
+                          cb_kwargs={"opts": opts}, dont_filter=True)
+        # by default, let the framework handle redirects,
+        # i.e. command handles all codes expect 3xx
+        if not opts.no_redirect:
+            request.meta['handle_httpstatus_list'] = SequenceExclude(range(300, 400))
         else:
-            spider = create_spider_for_request(crawler.spiders, request, \
-                default_spider=Spider('default'))
-        crawler.crawl(spider, [request])
+            request.meta['handle_httpstatus_all'] = True
+
+        spidercls = DefaultSpider
+        spider_loader = self.crawler_process.spider_loader
+        if opts.spider:
+            spidercls = spider_loader.load(opts.spider)
+        else:
+            spidercls = spidercls_for_request(spider_loader, request, spidercls)
+        self.crawler_process.crawl(spidercls, start_requests=lambda: [request])
         self.crawler_process.start()
diff --git a/scrapy/commands/genspider.py b/scrapy/commands/genspider.py
index 3e2e24b21..74a077d1b 100644
--- a/scrapy/commands/genspider.py
+++ b/scrapy/commands/genspider.py
@@ -1,4 +1,3 @@
-from __future__ import print_function
 import os
 import shutil
 import string
@@ -7,10 +6,11 @@ from importlib import import_module
 from os.path import join, dirname, abspath, exists, splitext
 
 import scrapy
-from scrapy.command import ScrapyCommand
+from scrapy.commands import ScrapyCommand
 from scrapy.utils.template import render_templatefile, string_camelcase
 from scrapy.exceptions import UsageError
 
+
 def sanitize_module_name(module_name):
     """Sanitize the given module name, by replacing dashes and points
     with underscores and prefixing it with a letter if it doesn't start
@@ -21,9 +21,10 @@ def sanitize_module_name(module_name):
         module_name = "a" + module_name
     return module_name
 
+
 class Command(ScrapyCommand):
 
-    requires_project = True
+    requires_project = False
     default_settings = {'LOG_ENABLED': False}
 
     def syntax(self):
@@ -35,15 +36,15 @@ class Command(ScrapyCommand):
     def add_options(self, parser):
         ScrapyCommand.add_options(self, parser)
         parser.add_option("-l", "--list", dest="list", action="store_true",
-            help="List available templates")
+                          help="List available templates")
         parser.add_option("-e", "--edit", dest="edit", action="store_true",
-            help="Edit spider after creating it")
+                          help="Edit spider after creating it")
         parser.add_option("-d", "--dump", dest="dump", metavar="TEMPLATE",
-            help="Dump template to standard output")
+                          help="Dump template to standard output")
         parser.add_option("-t", "--template", dest="template", default="basic",
-            help="Uses a custom template.")
+                          help="Uses a custom template.")
         parser.add_option("--force", dest="force", action="store_true",
-            help="If the spider already exists, overwrite it with the template")
+                          help="If the spider already exists, overwrite it with the template")
 
     def run(self, args, opts):
         if opts.list:
@@ -52,7 +53,8 @@ class Command(ScrapyCommand):
         if opts.dump:
             template_file = self._find_template(opts.dump)
             if template_file:
-                print(open(template_file, 'r').read())
+                with open(template_file, "r") as f:
+                    print(f.read())
             return
         if len(args) != 2:
             raise UsageError()
@@ -64,17 +66,9 @@ class Command(ScrapyCommand):
             print("Cannot create a spider with the same name as your project")
             return
 
-        try:
-            crawler = self.crawler_process.create_crawler()
-            spider = crawler.spiders.create(name)
-        except KeyError:
-            pass
-        else:
-            # if spider already exists and not --force then halt
-            if not opts.force:
-                print("Spider %r already exists in module:" % name)
-                print("  %s" % spider.__module__)
-                return
+        if not opts.force and self._spider_exists(name):
+            return
+
         template_file = self._find_template(opts.template)
         if template_file:
             self._genspider(module, name, domain, opts.template, template_file)
@@ -89,17 +83,21 @@ class Command(ScrapyCommand):
             'module': module,
             'name': name,
             'domain': domain,
-            'classname': '%sSpider' % ''.join([s.capitalize() \
-                for s in module.split('_')])
+            'classname': '%sSpider' % ''.join(s.capitalize() for s in module.split('_'))
         }
-        spiders_module = import_module(self.settings['NEWSPIDER_MODULE'])
-        spiders_dir = abspath(dirname(spiders_module.__file__))
+        if self.settings.get('NEWSPIDER_MODULE'):
+            spiders_module = import_module(self.settings['NEWSPIDER_MODULE'])
+            spiders_dir = abspath(dirname(spiders_module.__file__))
+        else:
+            spiders_module = None
+            spiders_dir = "."
         spider_file = "%s.py" % join(spiders_dir, module)
         shutil.copyfile(template_file, spider_file)
         render_templatefile(spider_file, **tvars)
-        print("Created spider %r using template %r in module:" % (name, \
-            template_name))
-        print("  %s.%s" % (spiders_module.__name__, module))
+        print("Created spider %r using template %r "
+              % (name, template_name), end=('' if spiders_module else '\n'))
+        if spiders_module:
+            print("in module:\n  %s.%s" % (spiders_module.__name__, module))
 
     def _find_template(self, template):
         template_file = join(self.templates_dir, '%s.tmpl' % template)
@@ -114,8 +112,37 @@ class Command(ScrapyCommand):
             if filename.endswith('.tmpl'):
                 print("  %s" % splitext(filename)[0])
 
+    def _spider_exists(self, name):
+        if not self.settings.get('NEWSPIDER_MODULE'):
+            # if run as a standalone command and file with same filename already exists
+            if exists(name + ".py"):
+                print("%s already exists" % (abspath(name + ".py")))
+                return True
+            return False
+
+        try:
+            spidercls = self.crawler_process.spider_loader.load(name)
+        except KeyError:
+            pass
+        else:
+            # if spider with same name exists
+            print("Spider %r already exists in module:" % name)
+            print("  %s" % spidercls.__module__)
+            return True
+
+        # a file with the same name exists in the target directory
+        spiders_module = import_module(self.settings['NEWSPIDER_MODULE'])
+        spiders_dir = dirname(spiders_module.__file__)
+        spiders_dir_abs = abspath(spiders_dir)
+        if exists(join(spiders_dir_abs, name + ".py")):
+            print("%s already exists" % (join(spiders_dir_abs, (name + ".py"))))
+            return True
+
+        return False
+
     @property
     def templates_dir(self):
-        _templates_base_dir = self.settings['TEMPLATES_DIR'] or \
-            join(scrapy.__path__[0], 'templates')
-        return join(_templates_base_dir, 'spiders')
+        return join(
+            self.settings['TEMPLATES_DIR'] or join(scrapy.__path__[0], 'templates'),
+            'spiders'
+        )
diff --git a/scrapy/commands/list.py b/scrapy/commands/list.py
index 0ea9c2313..54d7bb228 100644
--- a/scrapy/commands/list.py
+++ b/scrapy/commands/list.py
@@ -1,5 +1,5 @@
-from __future__ import print_function
-from scrapy.command import ScrapyCommand
+from scrapy.commands import ScrapyCommand
+
 
 class Command(ScrapyCommand):
 
@@ -10,6 +10,5 @@ class Command(ScrapyCommand):
         return "List available spiders"
 
     def run(self, args, opts):
-        crawler = self.crawler_process.create_crawler()
-        for s in sorted(crawler.spiders.list()):
+        for s in sorted(self.crawler_process.spider_loader.list()):
             print(s)
diff --git a/scrapy/commands/parse.py b/scrapy/commands/parse.py
index 0867a21a0..abc8ba9ff 100644
--- a/scrapy/commands/parse.py
+++ b/scrapy/commands/parse.py
@@ -1,16 +1,19 @@
-from __future__ import print_function
+import json
+import logging
+
+from itemadapter import is_item, ItemAdapter
 from w3lib.url import is_url
-from scrapy.command import ScrapyCommand
+
+from scrapy.commands import BaseRunSpiderCommand
 from scrapy.http import Request
-from scrapy.item import BaseItem
 from scrapy.utils import display
-from scrapy.utils.conf import arglist_to_dict
-from scrapy.utils.spider import iterate_spider_output, create_spider_for_request
+from scrapy.utils.spider import iterate_spider_output, spidercls_for_request
 from scrapy.exceptions import UsageError
-from scrapy import log
 
-class Command(ScrapyCommand):
+logger = logging.getLogger(__name__)
 
+
+class Command(BaseRunSpiderCommand):
     requires_project = True
 
     spider = None
@@ -26,34 +29,38 @@ class Command(ScrapyCommand):
         return "Parse URL (using its spider) and print the results"
 
     def add_options(self, parser):
-        ScrapyCommand.add_options(self, parser)
-        parser.add_option("--spider", dest="spider", default=None, \
-            help="use this spider without looking for one")
-        parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE", \
-            help="set spider argument (may be repeated)")
-        parser.add_option("--pipelines", action="store_true", \
-            help="process items through pipelines")
-        parser.add_option("--nolinks", dest="nolinks", action="store_true", \
-            help="don't show links to follow (extracted requests)")
-        parser.add_option("--noitems", dest="noitems", action="store_true", \
-            help="don't show scraped items")
-        parser.add_option("--nocolour", dest="nocolour", action="store_true", \
-            help="avoid using pygments to colorize the output")
-        parser.add_option("-r", "--rules", dest="rules", action="store_true", \
-            help="use CrawlSpider rules to discover the callback")
-        parser.add_option("-c", "--callback", dest="callback", \
-            help="use this callback for parsing, instead looking for a callback")
-        parser.add_option("-d", "--depth", dest="depth", type="int", default=1, \
-            help="maximum depth for parsing requests [default: %default]")
-        parser.add_option("-v", "--verbose", dest="verbose", action="store_true", \
-            help="print each depth level one by one")
-
+        BaseRunSpiderCommand.add_options(self, parser)
+        parser.add_option("--spider", dest="spider", default=None,
+                          help="use this spider without looking for one")
+        parser.add_option("--pipelines", action="store_true",
+                          help="process items through pipelines")
+        parser.add_option("--nolinks", dest="nolinks", action="store_true",
+                          help="don't show links to follow (extracted requests)")
+        parser.add_option("--noitems", dest="noitems", action="store_true",
+                          help="don't show scraped items")
+        parser.add_option("--nocolour", dest="nocolour", action="store_true",
+                          help="avoid using pygments to colorize the output")
+        parser.add_option("-r", "--rules", dest="rules", action="store_true",
+                          help="use CrawlSpider rules to discover the callback")
+        parser.add_option("-c", "--callback", dest="callback",
+                          help="use this callback for parsing, instead looking for a callback")
+        parser.add_option("-m", "--meta", dest="meta",
+                          help="inject extra meta into the Request, it must be a valid raw json string")
+        parser.add_option("--cbkwargs", dest="cbkwargs",
+                          help="inject extra callback kwargs into the Request, it must be a valid raw json string")
+        parser.add_option("-d", "--depth", dest="depth", type="int", default=1,
+                          help="maximum depth for parsing requests [default: %default]")
+        parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
+                          help="print each depth level one by one")
 
     @property
     def max_level(self):
-        levels = self.items.keys() + self.requests.keys()
-        if levels: return max(levels)
-        else: return 0
+        max_items, max_requests = 0, 0
+        if self.items:
+            max_items = max(self.items)
+        if self.requests:
+            max_requests = max(self.requests)
+        return max(max_items, max_requests)
 
     def add_items(self, lvl, new_items):
         old_items = self.items.get(lvl, [])
@@ -69,27 +76,26 @@ class Command(ScrapyCommand):
         else:
             items = self.items.get(lvl, [])
 
-        print("# Scraped Items ", "-"*60)
-        display.pprint([dict(x) for x in items], colorize=colour)
+        print("# Scraped Items ", "-" * 60)
+        display.pprint([ItemAdapter(x).asdict() for x in items], colorize=colour)
 
     def print_requests(self, lvl=None, colour=True):
         if lvl is None:
-            levels = self.requests.keys()
-            if levels:
-                requests = self.requests[max(levels)]
+            if self.requests:
+                requests = self.requests[max(self.requests)]
             else:
                 requests = []
         else:
             requests = self.requests.get(lvl, [])
 
-        print("# Requests ", "-"*65)
+        print("# Requests ", "-" * 65)
         display.pprint(requests, colorize=colour)
 
     def print_results(self, opts):
         colour = not opts.nocolour
 
         if opts.verbose:
-            for level in xrange(1, self.max_level+1):
+            for level in range(1, self.max_level + 1):
                 print('\n>>> DEPTH LEVEL: %s <<<' % level)
                 if not opts.noitems:
                     self.print_items(level, colour)
@@ -102,53 +108,55 @@ class Command(ScrapyCommand):
             if not opts.nolinks:
                 self.print_requests(colour=colour)
 
-
-    def run_callback(self, response, cb):
+    def run_callback(self, response, callback, cb_kwargs=None):
+        cb_kwargs = cb_kwargs or {}
         items, requests = [], []
 
-        for x in iterate_spider_output(cb(response)):
-            if isinstance(x, BaseItem):
+        for x in iterate_spider_output(callback(response, **cb_kwargs)):
+            if is_item(x):
                 items.append(x)
             elif isinstance(x, Request):
                 requests.append(x)
         return items, requests
 
-    def get_callback_from_rules(self, response):
-        if getattr(self.spider, 'rules', None):
-            for rule in self.spider.rules:
-                if rule.link_extractor.matches(response.url) and rule.callback:
-                    return rule.callback
+    def get_callback_from_rules(self, spider, response):
+        if getattr(spider, 'rules', None):
+            for rule in spider.rules:
+                if rule.link_extractor.matches(response.url):
+                    return rule.callback or "parse"
         else:
-            log.msg(format='No CrawlSpider rules found in spider %(spider)r, '
-                           'please specify a callback to use for parsing',
-                    level=log.ERROR, spider=self.spider.name)
+            logger.error('No CrawlSpider rules found in spider %(spider)r, '
+                         'please specify a callback to use for parsing',
+                         {'spider': spider.name})
 
-    def set_spider(self, url, opts):
+    def set_spidercls(self, url, opts):
+        spider_loader = self.crawler_process.spider_loader
         if opts.spider:
             try:
-                self.spider = self.pcrawler.spiders.create(opts.spider, **opts.spargs)
+                self.spidercls = spider_loader.load(opts.spider)
             except KeyError:
-                log.msg(format='Unable to find spider: %(spider)s',
-                        level=log.ERROR, spider=opts.spider)
+                logger.error('Unable to find spider: %(spider)s',
+                             {'spider': opts.spider})
         else:
-            self.spider = create_spider_for_request(self.pcrawler.spiders, Request(url), **opts.spargs)
-            if not self.spider:
-                log.msg(format='Unable to find spider for: %(url)s',
-                        level=log.ERROR, url=url)
+            self.spidercls = spidercls_for_request(spider_loader, Request(url))
+            if not self.spidercls:
+                logger.error('Unable to find spider for: %(url)s', {'url': url})
+
+        def _start_requests(spider):
+            yield self.prepare_request(spider, Request(url), opts)
+        self.spidercls.start_requests = _start_requests
 
     def start_parsing(self, url, opts):
-        request = Request(url, opts.callback)
-        request = self.prepare_request(request, opts)
-
-        self.pcrawler.crawl(self.spider, [request])
+        self.crawler_process.crawl(self.spidercls, **opts.spargs)
+        self.pcrawler = list(self.crawler_process.crawlers)[0]
         self.crawler_process.start()
 
         if not self.first_response:
-            log.msg(format='No response downloaded for: %(request)s',
-                    level=log.ERROR, request=request)
+            logger.error('No response downloaded for: %(url)s',
+                         {'url': url})
 
-    def prepare_request(self, request, opts):
-        def callback(response):
+    def prepare_request(self, spider, request, opts):
+        def callback(response, **cb_kwargs):
             # memorize first request
             if not self.first_response:
                 self.first_response = response
@@ -156,37 +164,55 @@ class Command(ScrapyCommand):
             # determine real callback
             cb = response.meta['_callback']
             if not cb:
-                if opts.rules and self.first_response == response:
-                    cb = self.get_callback_from_rules(response)
+                if opts.callback:
+                    cb = opts.callback
+                elif opts.rules and self.first_response == response:
+                    cb = self.get_callback_from_rules(spider, response)
+
+                    if not cb:
+                        logger.error('Cannot find a rule that matches %(url)r in spider: %(spider)s',
+                                     {'url': response.url, 'spider': spider.name})
+                        return
                 else:
                     cb = 'parse'
 
             if not callable(cb):
-                cb_method = getattr(self.spider, cb, None)
+                cb_method = getattr(spider, cb, None)
                 if callable(cb_method):
                     cb = cb_method
                 else:
-                    log.msg(format='Cannot find callback %(callback)r in spider: %(spider)s',
-                            callback=callback, spider=self.spider.name, level=log.ERROR)
+                    logger.error('Cannot find callback %(callback)r in spider: %(spider)s',
+                                 {'callback': cb, 'spider': spider.name})
                     return
 
             # parse items and requests
             depth = response.meta['_depth']
 
-            items, requests = self.run_callback(response, cb)
+            items, requests = self.run_callback(response, cb, cb_kwargs)
             if opts.pipelines:
                 itemproc = self.pcrawler.engine.scraper.itemproc
                 for item in items:
-                    itemproc.process_item(item, self.spider)
+                    itemproc.process_item(item, spider)
             self.add_items(depth, items)
             self.add_requests(depth, requests)
 
+            scraped_data = items if opts.output else []
             if depth < opts.depth:
                 for req in requests:
                     req.meta['_depth'] = depth + 1
                     req.meta['_callback'] = req.callback
                     req.callback = callback
-                return requests
+                scraped_data += requests
+
+            return scraped_data
+
+        # update request meta if any extra meta was passed through the --meta/-m opts.
+        if opts.meta:
+            request.meta.update(opts.meta)
+
+        # update cb_kwargs if any extra values were was passed through the --cbkwargs option.
+        if opts.cbkwargs:
+            request.cb_kwargs.update(opts.cbkwargs)
 
         request.meta['_depth'] = 1
         request.meta['_callback'] = request.callback
@@ -194,11 +220,26 @@ class Command(ScrapyCommand):
         return request
 
     def process_options(self, args, opts):
-        ScrapyCommand.process_options(self, args, opts)
-        try:
-            opts.spargs = arglist_to_dict(opts.spargs)
-        except ValueError:
-            raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
+        BaseRunSpiderCommand.process_options(self, args, opts)
+
+        self.process_request_meta(opts)
+        self.process_request_cb_kwargs(opts)
+
+    def process_request_meta(self, opts):
+        if opts.meta:
+            try:
+                opts.meta = json.loads(opts.meta)
+            except ValueError:
+                raise UsageError("Invalid -m/--meta value, pass a valid json string to -m or --meta. "
+                                 "Example: --meta='{\"foo\" : \"bar\"}'", print_help=False)
+
+    def process_request_cb_kwargs(self, opts):
+        if opts.cbkwargs:
+            try:
+                opts.cbkwargs = json.loads(opts.cbkwargs)
+            except ValueError:
+                raise UsageError("Invalid --cbkwargs value, pass a valid json string to --cbkwargs. "
+                                 "Example: --cbkwargs='{\"foo\" : \"bar\"}'", print_help=False)
 
     def run(self, args, opts):
         # parse arguments
@@ -207,10 +248,9 @@ class Command(ScrapyCommand):
         else:
             url = args[0]
 
-        # prepare spider
-        self.pcrawler = self.crawler_process.create_crawler()
-        self.set_spider(url, opts)
+        # prepare spidercls
+        self.set_spidercls(url, opts)
 
-        if self.spider and opts.depth > 0:
+        if self.spidercls and opts.depth > 0:
             self.start_parsing(url, opts)
             self.print_results(opts)
diff --git a/scrapy/commands/runspider.py b/scrapy/commands/runspider.py
index b1d501842..befee021b 100644
--- a/scrapy/commands/runspider.py
+++ b/scrapy/commands/runspider.py
@@ -3,9 +3,8 @@ import os
 from importlib import import_module
 
 from scrapy.utils.spider import iter_spider_classes
-from scrapy.command import ScrapyCommand
 from scrapy.exceptions import UsageError
-from scrapy.utils.conf import arglist_to_dict
+from scrapy.commands import BaseRunSpiderCommand
 
 
 def _import_file(filepath):
@@ -24,9 +23,10 @@ def _import_file(filepath):
     return module
 
 
-class Command(ScrapyCommand):
+class Command(BaseRunSpiderCommand):
 
     requires_project = False
+    default_settings = {'SPIDER_LOADER_WARN_ONLY': True}
 
     def syntax(self):
         return "[options] <spider_file>"
@@ -37,39 +37,6 @@ class Command(ScrapyCommand):
     def long_desc(self):
         return "Run the spider defined in the given file"
 
-    def add_options(self, parser):
-        ScrapyCommand.add_options(self, parser)
-        parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE",
-                          help="set spider argument (may be repeated)")
-        parser.add_option("-o", "--output", metavar="FILE",
-                          help="dump scraped items into FILE (use - for stdout)")
-        parser.add_option("-t", "--output-format", metavar="FORMAT",
-                          help="format to use for dumping items with -o")
-
-    def process_options(self, args, opts):
-        ScrapyCommand.process_options(self, args, opts)
-        try:
-            opts.spargs = arglist_to_dict(opts.spargs)
-        except ValueError:
-            raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
-        if opts.output:
-            if opts.output == '-':
-                self.settings.set('FEED_URI', 'stdout:', priority='cmdline')
-            else:
-                self.settings.set('FEED_URI', opts.output, priority='cmdline')
-            valid_output_formats = (
-                list(self.settings.getdict('FEED_EXPORTERS').keys()) +
-                list(self.settings.getdict('FEED_EXPORTERS_BASE').keys())
-            )
-            if not opts.output_format:
-                opts.output_format = os.path.splitext(opts.output)[1].replace(".", "")
-            if opts.output_format not in valid_output_formats:
-                raise UsageError("Unrecognized output format '%s', set one"
-                                 " using the '-t' switch or as a file extension"
-                                 " from the supported list %s" % (opts.output_format,
-                                                                  tuple(valid_output_formats)))
-            self.settings.set('FEED_FORMAT', opts.output_format, priority='cmdline')
-
     def run(self, args, opts):
         if len(args) != 1:
             raise UsageError()
@@ -83,8 +50,10 @@ class Command(ScrapyCommand):
         spclasses = list(iter_spider_classes(module))
         if not spclasses:
             raise UsageError("No spider found in file: %s\n" % filename)
-        spider = spclasses.pop()(**opts.spargs)
+        spidercls = spclasses.pop()
 
-        crawler = self.crawler_process.create_crawler()
-        crawler.crawl(spider)
+        self.crawler_process.crawl(spidercls, **opts.spargs)
         self.crawler_process.start()
+
+        if self.crawler_process.bootstrap_failed:
+            self.exitcode = 1
diff --git a/scrapy/commands/settings.py b/scrapy/commands/settings.py
index b8e0fe08c..8d49e440f 100644
--- a/scrapy/commands/settings.py
+++ b/scrapy/commands/settings.py
@@ -1,10 +1,14 @@
-from __future__ import print_function
-from scrapy.command import ScrapyCommand
+import json
+
+from scrapy.commands import ScrapyCommand
+from scrapy.settings import BaseSettings
+
 
 class Command(ScrapyCommand):
 
     requires_project = False
-    default_settings = {'LOG_ENABLED': False}
+    default_settings = {'LOG_ENABLED': False,
+                        'SPIDER_LOADER_WARN_ONLY': True}
 
     def syntax(self):
         return "[options]"
@@ -14,21 +18,25 @@ class Command(ScrapyCommand):
 
     def add_options(self, parser):
         ScrapyCommand.add_options(self, parser)
-        parser.add_option("--get", dest="get", metavar="SETTING", \
-            help="print raw setting value")
-        parser.add_option("--getbool", dest="getbool", metavar="SETTING", \
-            help="print setting value, intepreted as a boolean")
-        parser.add_option("--getint", dest="getint", metavar="SETTING", \
-            help="print setting value, intepreted as an integer")
-        parser.add_option("--getfloat", dest="getfloat", metavar="SETTING", \
-            help="print setting value, intepreted as an float")
-        parser.add_option("--getlist", dest="getlist", metavar="SETTING", \
-            help="print setting value, intepreted as an float")
+        parser.add_option("--get", dest="get", metavar="SETTING",
+                          help="print raw setting value")
+        parser.add_option("--getbool", dest="getbool", metavar="SETTING",
+                          help="print setting value, interpreted as a boolean")
+        parser.add_option("--getint", dest="getint", metavar="SETTING",
+                          help="print setting value, interpreted as an integer")
+        parser.add_option("--getfloat", dest="getfloat", metavar="SETTING",
+                          help="print setting value, interpreted as a float")
+        parser.add_option("--getlist", dest="getlist", metavar="SETTING",
+                          help="print setting value, interpreted as a list")
 
     def run(self, args, opts):
         settings = self.crawler_process.settings
         if opts.get:
-            print(settings.get(opts.get))
+            s = settings.get(opts.get)
+            if isinstance(s, BaseSettings):
+                print(json.dumps(s.copy_to_dict()))
+            else:
+                print(s)
         elif opts.getbool:
             print(settings.getbool(opts.getbool))
         elif opts.getint:
diff --git a/scrapy/commands/shell.py b/scrapy/commands/shell.py
index ab170e665..d1944df3d 100644
--- a/scrapy/commands/shell.py
+++ b/scrapy/commands/shell.py
@@ -3,17 +3,23 @@ Scrapy Shell
 
 See documentation in docs/topics/shell.rst
 """
-
 from threading import Thread
 
-from scrapy.command import ScrapyCommand
+from scrapy.commands import ScrapyCommand
+from scrapy.http import Request
 from scrapy.shell import Shell
+from scrapy.utils.spider import spidercls_for_request, DefaultSpider
+from scrapy.utils.url import guess_scheme
 
 
 class Command(ScrapyCommand):
 
     requires_project = False
-    default_settings = {'KEEP_ALIVE': True, 'LOGSTATS_INTERVAL': 0}
+    default_settings = {
+        'KEEP_ALIVE': True,
+        'LOGSTATS_INTERVAL': 0,
+        'DUPEFILTER_CLASS': 'scrapy.dupefilters.BaseDupeFilter',
+    }
 
     def syntax(self):
         return "[url|file]"
@@ -22,14 +28,17 @@ class Command(ScrapyCommand):
         return "Interactive scraping console"
 
     def long_desc(self):
-        return "Interactive console for scraping the given url"
+        return ("Interactive console for scraping the given url or file. "
+                "Use ./file.html syntax or full path for local file.")
 
     def add_options(self, parser):
         ScrapyCommand.add_options(self, parser)
         parser.add_option("-c", dest="code",
-            help="evaluate the code in the shell, print the result and exit")
+                          help="evaluate the code in the shell, print the result and exit")
         parser.add_option("--spider", dest="spider",
-            help="use this spider")
+                          help="use this spider")
+        parser.add_option("--no-redirect", dest="no_redirect", action="store_true", default=False,
+                          help="do not handle HTTP 3xx status codes and print response as-is")
 
     def update_vars(self, vars):
         """You can use this function to update the Scrapy objects that will be
@@ -38,18 +47,34 @@ class Command(ScrapyCommand):
         pass
 
     def run(self, args, opts):
-        crawler = self.crawler_process.create_crawler()
-
         url = args[0] if args else None
-        spider = crawler.spiders.create(opts.spider) if opts.spider else None
+        if url:
+            # first argument may be a local file
+            url = guess_scheme(url)
+
+        spider_loader = self.crawler_process.spider_loader
+
+        spidercls = DefaultSpider
+        if opts.spider:
+            spidercls = spider_loader.load(opts.spider)
+        elif url:
+            spidercls = spidercls_for_request(spider_loader, Request(url),
+                                              spidercls, log_multiple=True)
+
+        # The crawler is created this way since the Shell manually handles the
+        # crawling engine, so the set up in the crawl method won't work
+        crawler = self.crawler_process._create_crawler(spidercls)
+        # The Shell class needs a persistent engine in the crawler
+        crawler.engine = crawler._create_engine()
+        crawler.engine.start()
 
-        self.crawler_process.start_crawling()
         self._start_crawler_thread()
 
         shell = Shell(crawler, update_vars=self.update_vars, code=opts.code)
-        shell.start(url=url, spider=spider)
+        shell.start(url=url, redirect=not opts.no_redirect)
 
     def _start_crawler_thread(self):
-        t = Thread(target=self.crawler_process.start_reactor)
+        t = Thread(target=self.crawler_process.start,
+                   kwargs={'stop_after_crawl': False})
         t.daemon = True
         t.start()
diff --git a/scrapy/commands/startproject.py b/scrapy/commands/startproject.py
index 5c8783ceb..e5158d993 100644
--- a/scrapy/commands/startproject.py
+++ b/scrapy/commands/startproject.py
@@ -1,35 +1,41 @@
-from __future__ import print_function
 import re
-import shutil
+import os
 import string
 from importlib import import_module
 from os.path import join, exists, abspath
-from shutil import copytree, ignore_patterns
+from shutil import ignore_patterns, move, copy2, copystat
+from stat import S_IWUSR as OWNER_WRITE_PERMISSION
 
 import scrapy
-from scrapy.command import ScrapyCommand
+from scrapy.commands import ScrapyCommand
 from scrapy.utils.template import render_templatefile, string_camelcase
 from scrapy.exceptions import UsageError
 
 
-TEMPLATES_PATH = join(scrapy.__path__[0], 'templates', 'project')
-
 TEMPLATES_TO_RENDER = (
     ('scrapy.cfg',),
     ('${project_name}', 'settings.py.tmpl'),
     ('${project_name}', 'items.py.tmpl'),
     ('${project_name}', 'pipelines.py.tmpl'),
+    ('${project_name}', 'middlewares.py.tmpl'),
 )
 
-IGNORE = ignore_patterns('*.pyc', '.svn')
+IGNORE = ignore_patterns('*.pyc', '__pycache__', '.svn')
+
+
+def _make_writable(path):
+    current_permissions = os.stat(path).st_mode
+    os.chmod(path, current_permissions | OWNER_WRITE_PERMISSION)
 
 
 class Command(ScrapyCommand):
 
     requires_project = False
+    default_settings = {'LOG_ENABLED': False,
+                        'SPIDER_LOADER_WARN_ONLY': True}
 
     def syntax(self):
-        return "<project_name>"
+        return "<project_name> [project_dir]"
 
     def short_desc(self):
         return "Create new project"
@@ -43,36 +49,80 @@ class Command(ScrapyCommand):
                 return False
 
         if not re.search(r'^[_a-zA-Z]\w*$', project_name):
-            print('Error: Project names must begin with a letter and contain'\
-                    ' only\nletters, numbers and underscores')
-        elif exists(project_name):
-            print('Error: Directory %r already exists' % project_name)
+            print('Error: Project names must begin with a letter and contain'
+                  ' only\nletters, numbers and underscores')
         elif _module_exists(project_name):
             print('Error: Module %r already exists' % project_name)
         else:
             return True
         return False
 
+    def _copytree(self, src, dst):
+        """
+        Since the original function always creates the directory, to resolve
+        the issue a new function had to be created. It's a simple copy and
+        was reduced for this case.
+
+        More info at:
+        https://github.com/scrapy/scrapy/pull/2005
+        """
+        ignore = IGNORE
+        names = os.listdir(src)
+        ignored_names = ignore(src, names)
+
+        if not os.path.exists(dst):
+            os.makedirs(dst)
+
+        for name in names:
+            if name in ignored_names:
+                continue
+
+            srcname = os.path.join(src, name)
+            dstname = os.path.join(dst, name)
+            if os.path.isdir(srcname):
+                self._copytree(srcname, dstname)
+            else:
+                copy2(srcname, dstname)
+                _make_writable(dstname)
+
+        copystat(src, dst)
+        _make_writable(dst)
+
     def run(self, args, opts):
-        if len(args) != 1:
+        if len(args) not in (1, 2):
             raise UsageError()
+
         project_name = args[0]
+        project_dir = args[0]
+
+        if len(args) == 2:
+            project_dir = args[1]
+
+        if exists(join(project_dir, 'scrapy.cfg')):
+            self.exitcode = 1
+            print('Error: scrapy.cfg already exists in %s' % abspath(project_dir))
+            return
 
         if not self._is_valid_name(project_name):
             self.exitcode = 1
             return
 
-        moduletpl = join(TEMPLATES_PATH, 'module')
-        copytree(moduletpl, join(project_name, project_name), ignore=IGNORE)
-        shutil.copy(join(TEMPLATES_PATH, 'scrapy.cfg'), project_name)
+        self._copytree(self.templates_dir, abspath(project_dir))
+        move(join(project_dir, 'module'), join(project_dir, project_name))
         for paths in TEMPLATES_TO_RENDER:
             path = join(*paths)
-            tplfile = join(project_name,
-                string.Template(path).substitute(project_name=project_name))
-            render_templatefile(tplfile, project_name=project_name,
-                ProjectName=string_camelcase(project_name))
-        print("New Scrapy project %r created in:" % project_name)
-        print("    %s\n" % abspath(project_name))
+            tplfile = join(project_dir, string.Template(path).substitute(project_name=project_name))
+            render_templatefile(tplfile, project_name=project_name, ProjectName=string_camelcase(project_name))
+        print("New Scrapy project '%s', using template directory '%s', "
+              "created in:" % (project_name, self.templates_dir))
+        print("    %s\n" % abspath(project_dir))
         print("You can start your first spider with:")
-        print("    cd %s" % project_name)
+        print("    cd %s" % project_dir)
         print("    scrapy genspider example example.com")
+
+    @property
+    def templates_dir(self):
+        return join(
+            self.settings['TEMPLATES_DIR'] or join(scrapy.__path__[0], 'templates'),
+            'project'
+        )
diff --git a/scrapy/commands/version.py b/scrapy/commands/version.py
index 76b97200d..d0ea72a67 100644
--- a/scrapy/commands/version.py
+++ b/scrapy/commands/version.py
@@ -1,15 +1,13 @@
-from __future__ import print_function
-import sys
-import platform
-
-import twisted
-
 import scrapy
-from scrapy.command import ScrapyCommand
+from scrapy.commands import ScrapyCommand
+from scrapy.utils.versions import scrapy_components_versions
 
 
 class Command(ScrapyCommand):
 
+    default_settings = {'LOG_ENABLED': False,
+                        'SPIDER_LOADER_WARN_ONLY': True}
+
     def syntax(self):
         return "[-v]"
 
@@ -19,18 +17,14 @@ class Command(ScrapyCommand):
     def add_options(self, parser):
         ScrapyCommand.add_options(self, parser)
         parser.add_option("--verbose", "-v", dest="verbose", action="store_true",
-            help="also display twisted/python/platform info (useful for bug reports)")
+                          help="also display twisted/python/platform info (useful for bug reports)")
 
     def run(self, args, opts):
         if opts.verbose:
-            import lxml.etree
-            lxml_version = ".".join(map(str, lxml.etree.LXML_VERSION))
-            libxml2_version = ".".join(map(str, lxml.etree.LIBXML_VERSION))
-            print("Scrapy  : %s" % scrapy.__version__)
-            print("lxml    : %s" % lxml_version)
-            print("libxml2 : %s" % libxml2_version)
-            print("Twisted : %s" % twisted.version.short())
-            print("Python  : %s" % sys.version.replace("\n", "- "))
-            print("Platform: %s" % platform.platform())
+            versions = scrapy_components_versions()
+            width = max(len(n) for (n, _) in versions)
+            patt = "%-{}s : %s".format(width)
+            for name, version in versions:
+                print(patt % (name, version))
         else:
             print("Scrapy %s" % scrapy.__version__)
diff --git a/scrapy/commands/view.py b/scrapy/commands/view.py
index 679c51a67..c8f873334 100644
--- a/scrapy/commands/view.py
+++ b/scrapy/commands/view.py
@@ -1,20 +1,18 @@
-from scrapy.command import ScrapyCommand
 from scrapy.commands import fetch
 from scrapy.utils.response import open_in_browser
 
+
 class Command(fetch.Command):
 
     def short_desc(self):
         return "Open URL in browser, as seen by Scrapy"
 
     def long_desc(self):
-        return "Fetch a URL using the Scrapy downloader and show its " \
-            "contents in a browser"
+        return "Fetch a URL using the Scrapy downloader and show its contents in a browser"
 
     def add_options(self, parser):
-        ScrapyCommand.add_options(self, parser)
-        parser.add_option("--spider", dest="spider",
-            help="use this spider")
+        super().add_options(parser)
+        parser.remove_option("--headers")
 
     def _print_response(self, response, opts):
         open_in_browser(response)
diff --git a/scrapy/conf.py b/scrapy/conf.py
deleted file mode 100644
index 23efc6ffd..000000000
--- a/scrapy/conf.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# This module is kept for backwards compatibility, so users can import
-# scrapy.conf.settings and get the settings they expect
-
-import sys
-
-if 'scrapy.cmdline' not in sys.modules:
-    from scrapy.utils.project import get_project_settings
-    settings = get_project_settings()
-
-import warnings
-from scrapy.exceptions import ScrapyDeprecationWarning
-warnings.warn("Module `scrapy.conf` is deprecated, use `crawler.settings` attribute instead",
-    ScrapyDeprecationWarning, stacklevel=2)
diff --git a/scrapy/contracts/__init__.py b/scrapy/contracts/__init__.py
index 03e6e4e0c..5af3831a2 100644
--- a/scrapy/contracts/__init__.py
+++ b/scrapy/contracts/__init__.py
@@ -1,6 +1,7 @@
 import sys
 import re
 from functools import wraps
+from inspect import getmembers
 from unittest import TestCase
 
 from scrapy.http import Request
@@ -8,13 +9,22 @@ from scrapy.utils.spider import iterate_spider_output
 from scrapy.utils.python import get_spec
 
 
-class ContractsManager(object):
+class ContractsManager:
     contracts = {}
 
     def __init__(self, contracts):
         for contract in contracts:
             self.contracts[contract.name] = contract
 
+    def tested_methods_from_spidercls(self, spidercls):
+        is_method = re.compile(r"^\s*@", re.MULTILINE).search
+        methods = []
+        for key, value in getmembers(spidercls):
+            if callable(value) and value.__doc__ and is_method(value.__doc__):
+                methods.append(key)
+
+        return methods
+
     def extract_contracts(self, method):
         contracts = []
         for line in method.__doc__.split('\n'):
@@ -28,19 +38,42 @@ class ContractsManager(object):
 
         return contracts
 
+    def from_spider(self, spider, results):
+        requests = []
+        for method in self.tested_methods_from_spidercls(type(spider)):
+            bound_method = spider.__getattribute__(method)
+            try:
+                requests.append(self.from_method(bound_method, results))
+            except Exception:
+                case = _create_testcase(bound_method, 'contract')
+                results.addError(case, sys.exc_info())
+
+        return requests
+
     def from_method(self, method, results):
         contracts = self.extract_contracts(method)
         if contracts:
+            request_cls = Request
+            for contract in contracts:
+                if contract.request_cls is not None:
+                    request_cls = contract.request_cls
+
             # calculate request args
-            args, kwargs = get_spec(Request.__init__)
+            args, kwargs = get_spec(request_cls.__init__)
+
+            # Don't filter requests to allow
+            # testing different callbacks on the same URL.
+            kwargs['dont_filter'] = True
             kwargs['callback'] = method
+
             for contract in contracts:
                 kwargs = contract.adjust_request_args(kwargs)
 
-            # create and prepare request
             args.remove('self')
+
+            # check if all positional arguments are defined in kwargs
             if set(args).issubset(set(kwargs)):
-                request = Request(**kwargs)
+                request = request_cls(**kwargs)
 
                 # execute pre and post hooks in order
                 for contract in reversed(contracts):
@@ -57,25 +90,26 @@ class ContractsManager(object):
         cb = request.callback
 
         @wraps(cb)
-        def cb_wrapper(response):
+        def cb_wrapper(response, **cb_kwargs):
             try:
-                output = cb(response)
+                output = cb(response, **cb_kwargs)
                 output = list(iterate_spider_output(output))
-            except:
+            except Exception:
                 case = _create_testcase(method, 'callback')
                 results.addError(case, sys.exc_info())
 
         def eb_wrapper(failure):
             case = _create_testcase(method, 'errback')
-            exc_info = failure.value, failure.type, failure.getTracebackObject()
+            exc_info = failure.type, failure.value, failure.getTracebackObject()
             results.addError(case, exc_info)
 
         request.callback = cb_wrapper
         request.errback = eb_wrapper
 
 
-class Contract(object):
+class Contract:
     """ Abstract class for contracts """
+    request_cls = None
 
     def __init__(self, method, *args):
         self.testcase_pre = _create_testcase(method, '@%s pre-hook' % self.name)
@@ -87,7 +121,7 @@ class Contract(object):
             cb = request.callback
 
             @wraps(cb)
-            def wrapper(response):
+            def wrapper(response, **cb_kwargs):
                 try:
                     results.startTest(self.testcase_pre)
                     self.pre_process(response)
@@ -99,7 +133,7 @@ class Contract(object):
                 else:
                     results.addSuccess(self.testcase_pre)
                 finally:
-                    return list(iterate_spider_output(cb(response)))
+                    return list(iterate_spider_output(cb(response, **cb_kwargs)))
 
             request.callback = wrapper
 
@@ -110,8 +144,8 @@ class Contract(object):
             cb = request.callback
 
             @wraps(cb)
-            def wrapper(response):
-                output = list(iterate_spider_output(cb(response)))
+            def wrapper(response, **cb_kwargs):
+                output = list(iterate_spider_output(cb(response, **cb_kwargs)))
                 try:
                     results.startTest(self.testcase_post)
                     self.post_process(output)
diff --git a/scrapy/contracts/default.py b/scrapy/contracts/default.py
index 1d8367f82..cfdcc7c25 100644
--- a/scrapy/contracts/default.py
+++ b/scrapy/contracts/default.py
@@ -1,8 +1,10 @@
-from scrapy.item import BaseItem
-from scrapy.http import Request
-from scrapy.exceptions import ContractFail
+import json
 
-from . import Contract
+from itemadapter import is_item, ItemAdapter
+
+from scrapy.contracts import Contract
+from scrapy.exceptions import ContractFail
+from scrapy.http import Request
 
 
 # contracts
@@ -18,6 +20,20 @@ class UrlContract(Contract):
         return args
 
 
+class CallbackKeywordArgumentsContract(Contract):
+    """ Contract to set the keyword arguments for the request.
+        The value should be a JSON-encoded dictionary, e.g.:
+
+        @cb_kwargs {"arg1": "some value"}
+    """
+
+    name = 'cb_kwargs'
+
+    def adjust_request_args(self, args):
+        args['cb_kwargs'] = json.loads(' '.join(self.args))
+        return args
+
+
 class ReturnsContract(Contract):
     """ Contract to check the output of a callback
 
@@ -32,19 +48,23 @@ class ReturnsContract(Contract):
     """
 
     name = 'returns'
-    objects = {
-        'request': Request,
-        'requests': Request,
-        'item': BaseItem,
-        'items': BaseItem,
+    object_type_verifiers = {
+        'request': lambda x: isinstance(x, Request),
+        'requests': lambda x: isinstance(x, Request),
+        'item': is_item,
+        'items': is_item,
     }
 
     def __init__(self, *args, **kwargs):
-        super(ReturnsContract, self).__init__(*args, **kwargs)
+        super().__init__(*args, **kwargs)
 
-        assert len(self.args) in [1, 2, 3]
+        if len(self.args) not in [1, 2, 3]:
+            raise ValueError(
+                "Incorrect argument quantity: expected 1, 2 or 3, got %i"
+                % len(self.args)
+            )
         self.obj_name = self.args[0] or None
-        self.obj_type = self.objects[self.obj_name]
+        self.obj_type_verifier = self.object_type_verifiers[self.obj_name]
 
         try:
             self.min_bound = int(self.args[1])
@@ -59,7 +79,7 @@ class ReturnsContract(Contract):
     def post_process(self, output):
         occurrences = 0
         for x in output:
-            if isinstance(x, self.obj_type):
+            if self.obj_type_verifier(x):
                 occurrences += 1
 
         assertion = (self.min_bound <= occurrences <= self.max_bound)
@@ -70,8 +90,8 @@ class ReturnsContract(Contract):
             else:
                 expected = '%s..%s' % (self.min_bound, self.max_bound)
 
-            raise ContractFail("Returned %s %s, expected %s" % \
-                (occurrences, self.obj_name, expected))
+            raise ContractFail("Returned %s %s, expected %s" %
+                               (occurrences, self.obj_name, expected))
 
 
 class ScrapesContract(Contract):
@@ -83,7 +103,8 @@ class ScrapesContract(Contract):
 
     def post_process(self, output):
         for x in output:
-            if isinstance(x, BaseItem):
-                for arg in self.args:
-                    if not arg in x:
-                        raise ContractFail("'%s' field is missing" % arg)
+            if is_item(x):
+                missing = [arg for arg in self.args if arg not in ItemAdapter(x)]
+                if missing:
+                    missing_str = ", ".join(missing)
+                    raise ContractFail("Missing fields: %s" % missing_str)
diff --git a/scrapy/contrib/djangoitem.py b/scrapy/contrib/djangoitem.py
deleted file mode 100644
index 69f7ef1ac..000000000
--- a/scrapy/contrib/djangoitem.py
+++ /dev/null
@@ -1,75 +0,0 @@
-from scrapy.item import Field, Item, ItemMeta
-from scrapy import optional_features
-if 'django' in optional_features:
-    from django.core.exceptions import ValidationError
-
-
-class DjangoItemMeta(ItemMeta):
-
-    def  __new__(mcs, class_name, bases, attrs):
-        cls = super(DjangoItemMeta, mcs).__new__(mcs, class_name, bases, attrs)
-        cls.fields = cls.fields.copy()
-
-        if cls.django_model:
-            cls._model_fields = []
-            cls._model_meta = cls.django_model._meta
-            for model_field in cls._model_meta.fields:
-                if not model_field.auto_created:
-                    if model_field.name not in cls.fields:
-                        cls.fields[model_field.name] = Field()
-                    cls._model_fields.append(model_field.name)
-        return cls
-
-
-class DjangoItem(Item):
-
-    __metaclass__ = DjangoItemMeta
-
-    django_model = None
-
-    def __init__(self, *args, **kwargs):
-        super(DjangoItem, self).__init__(*args, **kwargs)
-        self._instance = None
-        self._errors = None
-
-    def save(self, commit=True):
-        if commit:
-            self.instance.save()
-        return self.instance
-
-    def is_valid(self, exclude=None):
-        self._get_errors(exclude)
-        return not bool(self._errors)
-
-    def _get_errors(self, exclude=None):
-        if self._errors is not None:
-            return self._errors
-
-        self._errors = {}
-        if exclude is None:
-            exclude = []
-
-        try:
-            self.instance.clean_fields(exclude=exclude)
-        except ValidationError as e:
-            self._errors = e.update_error_dict(self._errors)
-
-        try:
-            self.instance.clean()
-        except ValidationError as e:
-            self._errors = e.update_error_dict(self._errors)
-
-        # uniqueness is not checked, because it is faster to check it when
-        # saving object to database. Just beware, that failed save()
-        # raises IntegrityError instead of ValidationError.
-
-        return self._errors
-    errors = property(_get_errors)
-
-    @property
-    def instance(self):
-        if self._instance is None:
-            modelargs = dict((k, self.get(k)) for k in self._values
-                             if k in self._model_fields)
-            self._instance = self.django_model(**modelargs)
-        return self._instance
diff --git a/scrapy/contrib/downloadermiddleware/chunked.py b/scrapy/contrib/downloadermiddleware/chunked.py
deleted file mode 100644
index 57e97e4d2..000000000
--- a/scrapy/contrib/downloadermiddleware/chunked.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from scrapy.utils.http import decode_chunked_transfer
-
-
-class ChunkedTransferMiddleware(object):
-    """This middleware adds support for chunked transfer encoding, as
-    documented in: http://en.wikipedia.org/wiki/Chunked_transfer_encoding
-    """
-
-    def process_response(self, request, response, spider):
-        if response.headers.get('Transfer-Encoding') == 'chunked':
-            body = decode_chunked_transfer(response.body)
-            return response.replace(body=body)
-        return response
diff --git a/scrapy/contrib/downloadermiddleware/cookies.py b/scrapy/contrib/downloadermiddleware/cookies.py
deleted file mode 100644
index 4b63b8112..000000000
--- a/scrapy/contrib/downloadermiddleware/cookies.py
+++ /dev/null
@@ -1,89 +0,0 @@
-import os
-import six
-from collections import defaultdict
-
-from scrapy.exceptions import NotConfigured
-from scrapy.http import Response
-from scrapy.http.cookies import CookieJar
-from scrapy import log
-
-
-class CookiesMiddleware(object):
-    """This middleware enables working with sites that need cookies"""
-
-    def __init__(self, debug=False):
-        self.jars = defaultdict(CookieJar)
-        self.debug = debug
-
-    @classmethod
-    def from_crawler(cls, crawler):
-        if not crawler.settings.getbool('COOKIES_ENABLED'):
-            raise NotConfigured
-        return cls(crawler.settings.getbool('COOKIES_DEBUG'))
-
-    def process_request(self, request, spider):
-        if request.meta.get('dont_merge_cookies', False):
-            return
-
-        cookiejarkey = request.meta.get("cookiejar")
-        jar = self.jars[cookiejarkey]
-        cookies = self._get_request_cookies(jar, request)
-        for cookie in cookies:
-            jar.set_cookie_if_ok(cookie, request)
-
-        # set Cookie header
-        request.headers.pop('Cookie', None)
-        jar.add_cookie_header(request)
-        self._debug_cookie(request, spider)
-
-    def process_response(self, request, response, spider):
-        if request.meta.get('dont_merge_cookies', False):
-            return response
-
-        # extract cookies from Set-Cookie and drop invalid/expired cookies
-        cookiejarkey = request.meta.get("cookiejar")
-        jar = self.jars[cookiejarkey]
-        jar.extract_cookies(response, request)
-        self._debug_set_cookie(response, spider)
-
-        return response
-
-    def _debug_cookie(self, request, spider):
-        if self.debug:
-            cl = request.headers.getlist('Cookie')
-            if cl:
-                msg = "Sending cookies to: %s" % request + os.linesep
-                msg += os.linesep.join("Cookie: %s" % c for c in cl)
-                log.msg(msg, spider=spider, level=log.DEBUG)
-
-    def _debug_set_cookie(self, response, spider):
-        if self.debug:
-            cl = response.headers.getlist('Set-Cookie')
-            if cl:
-                msg = "Received cookies from: %s" % response + os.linesep
-                msg += os.linesep.join("Set-Cookie: %s" % c for c in cl)
-                log.msg(msg, spider=spider, level=log.DEBUG)
-
-    def _format_cookie(self, cookie):
-        # build cookie string
-        cookie_str = '%s=%s' % (cookie['name'], cookie['value'])
-
-        if cookie.get('path', None):
-            cookie_str += '; Path=%s' % cookie['path']
-        if cookie.get('domain', None):
-            cookie_str += '; Domain=%s' % cookie['domain']
-
-        return cookie_str
-
-    def _get_request_cookies(self, jar, request):
-        if isinstance(request.cookies, dict):
-            cookie_list = [{'name': k, 'value': v} for k, v in \
-                    six.iteritems(request.cookies)]
-        else:
-            cookie_list = request.cookies
-
-        cookies = [self._format_cookie(x) for x in cookie_list]
-        headers = {'Set-Cookie': cookies}
-        response = Response(request.url, headers=headers)
-
-        return jar.make_cookies(response, request)
diff --git a/scrapy/contrib/downloadermiddleware/httpproxy.py b/scrapy/contrib/downloadermiddleware/httpproxy.py
deleted file mode 100644
index ce09655d0..000000000
--- a/scrapy/contrib/downloadermiddleware/httpproxy.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import base64
-from urllib import getproxies, unquote, proxy_bypass
-from urllib2 import _parse_proxy
-from six.moves.urllib.parse import urlunparse
-
-from scrapy.utils.httpobj import urlparse_cached
-from scrapy.exceptions import NotConfigured
-
-
-class HttpProxyMiddleware(object):
-
-    def __init__(self):
-        self.proxies = {}
-        for type, url in getproxies().items():
-            self.proxies[type] = self._get_proxy(url, type)
-
-        if not self.proxies:
-            raise NotConfigured
-
-    def _get_proxy(self, url, orig_type):
-        proxy_type, user, password, hostport = _parse_proxy(url)
-        proxy_url = urlunparse((proxy_type or orig_type, hostport, '', '', '', ''))
-
-        if user and password:
-            user_pass = '%s:%s' % (unquote(user), unquote(password))
-            creds = base64.b64encode(user_pass).strip()
-        else:
-            creds = None
-
-        return creds, proxy_url
-
-    def process_request(self, request, spider):
-        # ignore if proxy is already seted
-        if 'proxy' in request.meta:
-            return
-
-        parsed = urlparse_cached(request)
-        scheme = parsed.scheme
-
-        # 'no_proxy' is only supported by http schemes
-        if scheme in ('http', 'https') and proxy_bypass(parsed.hostname):
-            return
-
-        if scheme in self.proxies:
-            self._set_proxy(request, scheme)
-
-    def _set_proxy(self, request, scheme):
-        creds, proxy = self.proxies[scheme]
-        request.meta['proxy'] = proxy
-        if creds:
-            request.headers['Proxy-Authorization'] = 'Basic ' + creds
diff --git a/scrapy/contrib/downloadermiddleware/redirect.py b/scrapy/contrib/downloadermiddleware/redirect.py
deleted file mode 100644
index cfb10d4db..000000000
--- a/scrapy/contrib/downloadermiddleware/redirect.py
+++ /dev/null
@@ -1,99 +0,0 @@
-from six.moves.urllib.parse import urljoin
-
-from scrapy import log
-from scrapy.http import HtmlResponse
-from scrapy.utils.response import get_meta_refresh
-from scrapy.exceptions import IgnoreRequest, NotConfigured
-
-
-class BaseRedirectMiddleware(object):
-
-    enabled_setting = 'REDIRECT_ENABLED'
-
-    def __init__(self, settings):
-        if not settings.getbool(self.enabled_setting):
-            raise NotConfigured
-
-        self.max_redirect_times = settings.getint('REDIRECT_MAX_TIMES')
-        self.priority_adjust = settings.getint('REDIRECT_PRIORITY_ADJUST')
-
-    @classmethod
-    def from_crawler(cls, crawler):
-        return cls(crawler.settings)
-
-    def _redirect(self, redirected, request, spider, reason):
-        ttl = request.meta.setdefault('redirect_ttl', self.max_redirect_times)
-        redirects = request.meta.get('redirect_times', 0) + 1
-
-        if ttl and redirects <= self.max_redirect_times:
-            redirected.meta['redirect_times'] = redirects
-            redirected.meta['redirect_ttl'] = ttl - 1
-            redirected.meta['redirect_urls'] = request.meta.get('redirect_urls', []) + \
-                [request.url]
-            redirected.dont_filter = request.dont_filter
-            redirected.priority = request.priority + self.priority_adjust
-            log.msg(format="Redirecting (%(reason)s) to %(redirected)s from %(request)s",
-                    level=log.DEBUG, spider=spider, request=request,
-                    redirected=redirected, reason=reason)
-            return redirected
-        else:
-            log.msg(format="Discarding %(request)s: max redirections reached",
-                    level=log.DEBUG, spider=spider, request=request)
-            raise IgnoreRequest("max redirections reached")
-
-    def _redirect_request_using_get(self, request, redirect_url):
-        redirected = request.replace(url=redirect_url, method='GET', body='')
-        redirected.headers.pop('Content-Type', None)
-        redirected.headers.pop('Content-Length', None)
-        return redirected
-
-
-class RedirectMiddleware(BaseRedirectMiddleware):
-    """Handle redirection of requests based on response status and meta-refresh html tag"""
-
-    def process_response(self, request, response, spider):
-        if request.meta.get('dont_redirect', False):
-            return response
-
-        if request.method == 'HEAD':
-            if response.status in [301, 302, 303, 307] and 'Location' in response.headers:
-                redirected_url = urljoin(request.url, response.headers['location'])
-                redirected = request.replace(url=redirected_url)
-                return self._redirect(redirected, request, spider, response.status)
-            else:
-                return response
-
-        if response.status in [302, 303] and 'Location' in response.headers:
-            redirected_url = urljoin(request.url, response.headers['location'])
-            redirected = self._redirect_request_using_get(request, redirected_url)
-            return self._redirect(redirected, request, spider, response.status)
-
-        if response.status in [301, 307] and 'Location' in response.headers:
-            redirected_url = urljoin(request.url, response.headers['location'])
-            redirected = request.replace(url=redirected_url)
-            return self._redirect(redirected, request, spider, response.status)
-
-        return response
-
-
-class MetaRefreshMiddleware(BaseRedirectMiddleware):
-
-    enabled_setting = 'METAREFRESH_ENABLED'
-
-    def __init__(self, settings):
-        super(MetaRefreshMiddleware, self).__init__(settings)
-        self._maxdelay = settings.getint('REDIRECT_MAX_METAREFRESH_DELAY',
-                                         settings.getint('METAREFRESH_MAXDELAY'))
-
-    def process_response(self, request, response, spider):
-        if request.meta.get('dont_redirect', False) or request.method == 'HEAD' or \
-                not isinstance(response, HtmlResponse):
-            return response
-
-        if isinstance(response, HtmlResponse):
-            interval, url = get_meta_refresh(response)
-            if url and interval < self._maxdelay:
-                redirected = self._redirect_request_using_get(request, url)
-                return self._redirect(redirected, request, spider, 'meta refresh')
-
-        return response
diff --git a/scrapy/contrib/downloadermiddleware/robotstxt.py b/scrapy/contrib/downloadermiddleware/robotstxt.py
deleted file mode 100644
index f1e8012e7..000000000
--- a/scrapy/contrib/downloadermiddleware/robotstxt.py
+++ /dev/null
@@ -1,54 +0,0 @@
-"""
-This is a middleware to respect robots.txt policies. To activate it you must
-enable this middleware and enable the ROBOTSTXT_OBEY setting.
-
-"""
-
-from six.moves.urllib import robotparser
-
-from scrapy import signals, log
-from scrapy.exceptions import NotConfigured, IgnoreRequest
-from scrapy.http import Request
-from scrapy.utils.httpobj import urlparse_cached
-
-
-class RobotsTxtMiddleware(object):
-    DOWNLOAD_PRIORITY = 1000
-
-    def __init__(self, crawler):
-        if not crawler.settings.getbool('ROBOTSTXT_OBEY'):
-            raise NotConfigured
-
-        self.crawler = crawler
-        self._useragent = crawler.settings.get('USER_AGENT')
-        self._parsers = {}
-        self._spider_netlocs = set()
-
-    @classmethod
-    def from_crawler(cls, crawler):
-        return cls(crawler)
-
-    def process_request(self, request, spider):
-        useragent = self._useragent
-        rp = self.robot_parser(request, spider)
-        if rp and not rp.can_fetch(useragent, request.url):
-            log.msg(format="Forbidden by robots.txt: %(request)s",
-                    level=log.DEBUG, request=request)
-            raise IgnoreRequest
-
-    def robot_parser(self, request, spider):
-        url = urlparse_cached(request)
-        netloc = url.netloc
-        if netloc not in self._parsers:
-            self._parsers[netloc] = None
-            robotsurl = "%s://%s/robots.txt" % (url.scheme, url.netloc)
-            robotsreq = Request(robotsurl, priority=self.DOWNLOAD_PRIORITY)
-            dfd = self.crawler.engine.download(robotsreq, spider)
-            dfd.addCallback(self._parse_robots)
-            self._spider_netlocs.add(netloc)
-        return self._parsers[netloc]
-
-    def _parse_robots(self, response):
-        rp = robotparser.RobotFileParser(response.url)
-        rp.parse(response.body.splitlines())
-        self._parsers[urlparse_cached(response).netloc] = rp
diff --git a/scrapy/contrib/exporter/__init__.py b/scrapy/contrib/exporter/__init__.py
deleted file mode 100644
index cc88f8792..000000000
--- a/scrapy/contrib/exporter/__init__.py
+++ /dev/null
@@ -1,256 +0,0 @@
-"""
-Item Exporters are used to export/serialize items into different formats.
-"""
-
-import csv
-import sys
-import pprint
-import marshal
-import six
-from six.moves import cPickle as pickle
-from xml.sax.saxutils import XMLGenerator
-from scrapy.utils.serialize import ScrapyJSONEncoder
-from scrapy.item import BaseItem
-
-__all__ = ['BaseItemExporter', 'PprintItemExporter', 'PickleItemExporter',
-           'CsvItemExporter', 'XmlItemExporter', 'JsonLinesItemExporter',
-           'JsonItemExporter', 'MarshalItemExporter']
-
-
-class BaseItemExporter(object):
-
-    def __init__(self, **kwargs):
-        self._configure(kwargs)
-
-    def _configure(self, options, dont_fail=False):
-        """Configure the exporter by poping options from the ``options`` dict.
-        If dont_fail is set, it won't raise an exception on unexpected options
-        (useful for using with keyword arguments in subclasses constructors)
-        """
-        self.fields_to_export = options.pop('fields_to_export', None)
-        self.export_empty_fields = options.pop('export_empty_fields', False)
-        self.encoding = options.pop('encoding', 'utf-8')
-        if not dont_fail and options:
-            raise TypeError("Unexpected options: %s" % ', '.join(options.keys()))
-
-    def export_item(self, item):
-        raise NotImplementedError
-
-    def serialize_field(self, field, name, value):
-        serializer = field.get('serializer', self._to_str_if_unicode)
-        return serializer(value)
-
-    def start_exporting(self):
-        pass
-
-    def finish_exporting(self):
-        pass
-
-    def _to_str_if_unicode(self, value):
-        return value.encode(self.encoding) if isinstance(value, unicode) else value
-
-    def _get_serialized_fields(self, item, default_value=None, include_empty=None):
-        """Return the fields to export as an iterable of tuples (name,
-        serialized_value)
-        """
-        if include_empty is None:
-            include_empty = self.export_empty_fields
-        if self.fields_to_export is None:
-            if include_empty:
-                field_iter = six.iterkeys(item.fields)
-            else:
-                field_iter = six.iterkeys(item)
-        else:
-            if include_empty:
-                field_iter = self.fields_to_export
-            else:
-                nonempty_fields = set(item.keys())
-                field_iter = (x for x in self.fields_to_export if x in
-                              nonempty_fields)
-        for field_name in field_iter:
-            if field_name in item:
-                field = item.fields[field_name]
-                value = self.serialize_field(field, field_name, item[field_name])
-            else:
-                value = default_value
-
-            yield field_name, value
-
-
-class JsonLinesItemExporter(BaseItemExporter):
-
-    def __init__(self, file, **kwargs):
-        self._configure(kwargs, dont_fail=True)
-        self.file = file
-        self.encoder = ScrapyJSONEncoder(**kwargs)
-
-    def export_item(self, item):
-        itemdict = dict(self._get_serialized_fields(item))
-        self.file.write(self.encoder.encode(itemdict) + '\n')
-
-
-class JsonItemExporter(JsonLinesItemExporter):
-
-    def __init__(self, file, **kwargs):
-        self._configure(kwargs, dont_fail=True)
-        self.file = file
-        self.encoder = ScrapyJSONEncoder(**kwargs)
-        self.first_item = True
-
-    def start_exporting(self):
-        self.file.write("[")
-
-    def finish_exporting(self):
-        self.file.write("]")
-
-    def export_item(self, item):
-        if self.first_item:
-            self.first_item = False
-        else:
-            self.file.write(',\n')
-        itemdict = dict(self._get_serialized_fields(item))
-        self.file.write(self.encoder.encode(itemdict))
-
-
-class XmlItemExporter(BaseItemExporter):
-
-    def __init__(self, file, **kwargs):
-        self.item_element = kwargs.pop('item_element', 'item')
-        self.root_element = kwargs.pop('root_element', 'items')
-        self._configure(kwargs)
-        self.xg = XMLGenerator(file, encoding=self.encoding)
-
-    def start_exporting(self):
-        self.xg.startDocument()
-        self.xg.startElement(self.root_element, {})
-
-    def export_item(self, item):
-        self.xg.startElement(self.item_element, {})
-        for name, value in self._get_serialized_fields(item, default_value=''):
-            self._export_xml_field(name, value)
-        self.xg.endElement(self.item_element)
-
-    def finish_exporting(self):
-        self.xg.endElement(self.root_element)
-        self.xg.endDocument()
-
-    def _export_xml_field(self, name, serialized_value):
-        self.xg.startElement(name, {})
-        if hasattr(serialized_value, 'items'):
-            for subname, value in serialized_value.items():
-                self._export_xml_field(subname, value)
-        elif hasattr(serialized_value, '__iter__'):
-            for value in serialized_value:
-                self._export_xml_field('value', value)
-        else:
-            self._xg_characters(serialized_value)
-        self.xg.endElement(name)
-
-    # Workaround for http://bugs.python.org/issue17606
-    # Before Python 2.7.4 xml.sax.saxutils required bytes;
-    # since 2.7.4 it requires unicode. The bug is likely to be
-    # fixed in 2.7.6, but 2.7.6 will still support unicode,
-    # and Python 3.x will require unicode, so ">= 2.7.4" should be fine.
-    if sys.version_info[:3] >= (2, 7, 4):
-        def _xg_characters(self, serialized_value):
-            if not isinstance(serialized_value, unicode):
-                serialized_value = serialized_value.decode(self.encoding)
-            return self.xg.characters(serialized_value)
-    else:
-        def _xg_characters(self, serialized_value):
-            return self.xg.characters(serialized_value)
-
-
-class CsvItemExporter(BaseItemExporter):
-
-    def __init__(self, file, include_headers_line=True, join_multivalued=',', **kwargs):
-        self._configure(kwargs, dont_fail=True)
-        self.include_headers_line = include_headers_line
-        self.csv_writer = csv.writer(file, **kwargs)
-        self._headers_not_written = True
-        self._join_multivalued = join_multivalued
-
-    def _to_str_if_unicode(self, value):
-        if isinstance(value, (list, tuple)):
-            try:
-                value = self._join_multivalued.join(value)
-            except TypeError:  # list in value may not contain strings
-                pass
-        return super(CsvItemExporter, self)._to_str_if_unicode(value)
-
-    def export_item(self, item):
-        if self._headers_not_written:
-            self._headers_not_written = False
-            self._write_headers_and_set_fields_to_export(item)
-
-        fields = self._get_serialized_fields(item, default_value='',
-                                             include_empty=True)
-        values = [x[1] for x in fields]
-        self.csv_writer.writerow(values)
-
-    def _write_headers_and_set_fields_to_export(self, item):
-        if self.include_headers_line:
-            if not self.fields_to_export:
-                self.fields_to_export = item.fields.keys()
-            self.csv_writer.writerow(self.fields_to_export)
-
-
-class PickleItemExporter(BaseItemExporter):
-
-    def __init__(self, file, protocol=2, **kwargs):
-        self._configure(kwargs)
-        self.file = file
-        self.protocol = protocol
-
-    def export_item(self, item):
-        d = dict(self._get_serialized_fields(item))
-        pickle.dump(d, self.file, self.protocol)
-
-
-class MarshalItemExporter(BaseItemExporter):
-
-    def __init__(self, file, **kwargs):
-        self._configure(kwargs)
-        self.file = file
-
-    def export_item(self, item):
-        marshal.dump(dict(self._get_serialized_fields(item)), self.file)
-
-
-class PprintItemExporter(BaseItemExporter):
-
-    def __init__(self, file, **kwargs):
-        self._configure(kwargs)
-        self.file = file
-
-    def export_item(self, item):
-        itemdict = dict(self._get_serialized_fields(item))
-        self.file.write(pprint.pformat(itemdict) + '\n')
-
-
-class PythonItemExporter(BaseItemExporter):
-    """The idea behind this exporter is to have a mechanism to serialize items
-    to built-in python types so any serialization library (like
-    json, msgpack, binc, etc) can be used on top of it. Its main goal is to
-    seamless support what BaseItemExporter does plus nested items.
-    """
-
-    def serialize_field(self, field, name, value):
-        serializer = field.get('serializer', self._serialize_value)
-        return serializer(value)
-
-    def _serialize_value(self, value):
-        if isinstance(value, BaseItem):
-            return self.export_item(value)
-        if isinstance(value, dict):
-            return dict(self._serialize_dict(value))
-        if hasattr(value, '__iter__'):
-            return [self._serialize_value(v) for v in value]
-        return self._to_str_if_unicode(value)
-
-    def _serialize_dict(self, value):
-        for key, val in six.iteritems(value):
-            yield key, self._serialize_value(val)
-
-    def export_item(self, item):
-        return dict(self._get_serialized_fields(item))
diff --git a/scrapy/contrib/feedexport.py b/scrapy/contrib/feedexport.py
deleted file mode 100644
index 92664220c..000000000
--- a/scrapy/contrib/feedexport.py
+++ /dev/null
@@ -1,234 +0,0 @@
-"""
-Feed Exports extension
-
-See documentation in docs/topics/feed-exports.rst
-"""
-
-import sys, os, posixpath
-from tempfile import TemporaryFile
-from datetime import datetime
-from six.moves.urllib.parse import urlparse
-from ftplib import FTP
-
-from zope.interface import Interface, implementer
-from twisted.internet import defer, threads
-from w3lib.url import file_uri_to_path
-
-from scrapy import log, signals
-from scrapy.utils.ftp import ftp_makedirs_cwd
-from scrapy.exceptions import NotConfigured
-from scrapy.utils.misc import load_object
-from scrapy.utils.python import get_func_args
-
-
-class IFeedStorage(Interface):
-    """Interface that all Feed Storages must implement"""
-
-    def __init__(uri):
-        """Initialize the storage with the parameters given in the URI"""
-
-    def open(spider):
-        """Open the storage for the given spider. It must return a file-like
-        object that will be used for the exporters"""
-
-    def store(file):
-        """Store the given file stream"""
-
-
-@implementer(IFeedStorage)
-class BlockingFeedStorage(object):
-
-    def open(self, spider):
-        return TemporaryFile(prefix='feed-')
-
-    def store(self, file):
-        return threads.deferToThread(self._store_in_thread, file)
-
-    def _store_in_thread(self, file):
-        raise NotImplementedError
-
-
-@implementer(IFeedStorage)
-class StdoutFeedStorage(object):
-
-    def __init__(self, uri, _stdout=sys.stdout):
-        self._stdout = _stdout
-
-    def open(self, spider):
-        return self._stdout
-
-    def store(self, file):
-        pass
-
-
-@implementer(IFeedStorage)
-class FileFeedStorage(object):
-
-    def __init__(self, uri):
-        self.path = file_uri_to_path(uri)
-
-    def open(self, spider):
-        dirname = os.path.dirname(self.path)
-        if dirname and not os.path.exists(dirname):
-            os.makedirs(dirname)
-        return open(self.path, 'ab')
-
-    def store(self, file):
-        file.close()
-
-
-class S3FeedStorage(BlockingFeedStorage):
-
-    def __init__(self, uri):
-        from scrapy.conf import settings
-        try:
-            import boto
-        except ImportError:
-            raise NotConfigured
-        self.connect_s3 = boto.connect_s3
-        u = urlparse(uri)
-        self.bucketname = u.hostname
-        self.access_key = u.username or settings['AWS_ACCESS_KEY_ID']
-        self.secret_key = u.password or settings['AWS_SECRET_ACCESS_KEY']
-        self.keyname = u.path
-
-    def _store_in_thread(self, file):
-        file.seek(0)
-        conn = self.connect_s3(self.access_key, self.secret_key)
-        bucket = conn.get_bucket(self.bucketname, validate=False)
-        key = bucket.new_key(self.keyname)
-        key.set_contents_from_file(file)
-        key.close()
-
-
-class FTPFeedStorage(BlockingFeedStorage):
-
-    def __init__(self, uri):
-        u = urlparse(uri)
-        self.host = u.hostname
-        self.port = int(u.port or '21')
-        self.username = u.username
-        self.password = u.password
-        self.path = u.path
-
-    def _store_in_thread(self, file):
-        file.seek(0)
-        ftp = FTP()
-        ftp.connect(self.host, self.port)
-        ftp.login(self.username, self.password)
-        dirname, filename = posixpath.split(self.path)
-        ftp_makedirs_cwd(ftp, dirname)
-        ftp.storbinary('STOR %s' % filename, file)
-        ftp.quit()
-
-
-class SpiderSlot(object):
-    def __init__(self, file, exporter, storage, uri):
-        self.file = file
-        self.exporter = exporter
-        self.storage = storage
-        self.uri = uri
-        self.itemcount = 0
-
-
-class FeedExporter(object):
-
-    def __init__(self, settings):
-        self.settings = settings
-        self.urifmt = settings['FEED_URI']
-        if not self.urifmt:
-            raise NotConfigured
-        self.format = settings['FEED_FORMAT'].lower()
-        self.storages = self._load_components('FEED_STORAGES')
-        self.exporters = self._load_components('FEED_EXPORTERS')
-        if not self._storage_supported(self.urifmt):
-            raise NotConfigured
-        if not self._exporter_supported(self.format):
-            raise NotConfigured
-        self.store_empty = settings.getbool('FEED_STORE_EMPTY')
-        uripar = settings['FEED_URI_PARAMS']
-        self._uripar = load_object(uripar) if uripar else lambda x, y: None
-
-    @classmethod
-    def from_crawler(cls, crawler):
-        if len(get_func_args(cls)) < 1:
-            # FIXME: remove for scrapy 0.17
-            import warnings
-            from scrapy.exceptions import ScrapyDeprecationWarning
-            warnings.warn("%s must receive a settings object as first constructor argument." % cls.__name__,
-                ScrapyDeprecationWarning, stacklevel=2)
-            o = cls()
-        else:
-            o = cls(crawler.settings)
-        crawler.signals.connect(o.open_spider, signals.spider_opened)
-        crawler.signals.connect(o.close_spider, signals.spider_closed)
-        crawler.signals.connect(o.item_scraped, signals.item_scraped)
-        return o
-
-    def open_spider(self, spider):
-        uri = self.urifmt % self._get_uri_params(spider)
-        storage = self._get_storage(uri)
-        file = storage.open(spider)
-        exporter = self._get_exporter(file)
-        exporter.start_exporting()
-        self.slot = SpiderSlot(file, exporter, storage, uri)
-
-    def close_spider(self, spider):
-        slot = self.slot
-        if not slot.itemcount and not self.store_empty:
-            return
-        slot.exporter.finish_exporting()
-        logfmt = "%%s %s feed (%d items) in: %s" % (self.format, \
-            slot.itemcount, slot.uri)
-        d = defer.maybeDeferred(slot.storage.store, slot.file)
-        d.addCallback(lambda _: log.msg(logfmt % "Stored", spider=spider))
-        d.addErrback(log.err, logfmt % "Error storing", spider=spider)
-        return d
-
-    def item_scraped(self, item, spider):
-        slot = self.slot
-        slot.exporter.export_item(item)
-        slot.itemcount += 1
-        return item
-
-    def _load_components(self, setting_prefix):
-        conf = dict(self.settings['%s_BASE' % setting_prefix])
-        conf.update(self.settings[setting_prefix])
-        d = {}
-        for k, v in conf.items():
-            try:
-                d[k] = load_object(v)
-            except NotConfigured:
-                pass
-        return d
-
-    def _exporter_supported(self, format):
-        if format in self.exporters:
-            return True
-        log.msg("Unknown feed format: %s" % format, log.ERROR)
-
-    def _storage_supported(self, uri):
-        scheme = urlparse(uri).scheme
-        if scheme in self.storages:
-            try:
-                self._get_storage(uri)
-                return True
-            except NotConfigured:
-                log.msg("Disabled feed storage scheme: %s" % scheme, log.ERROR)
-        else:
-            log.msg("Unknown feed storage scheme: %s" % scheme, log.ERROR)
-
-    def _get_exporter(self, *a, **kw):
-        return self.exporters[self.format](*a, **kw)
-
-    def _get_storage(self, uri):
-        return self.storages[urlparse(uri).scheme](uri)
-
-    def _get_uri_params(self, spider):
-        params = {}
-        for k in dir(spider):
-            params[k] = getattr(spider, k)
-        ts = datetime.utcnow().replace(microsecond=0).isoformat().replace(':', '-')
-        params['time'] = ts
-        self._uripar(params, spider)
-        return params
diff --git a/scrapy/contrib/linkextractors/__init__.py b/scrapy/contrib/linkextractors/__init__.py
deleted file mode 100644
index 48b9c757a..000000000
--- a/scrapy/contrib/linkextractors/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-"""
-scrapy.contrib.linkextractors
-
-This package contains a collection of Link Extractors.
-
-For more info see docs/topics/link-extractors.rst
-"""
-from .lxmlhtml import LxmlLinkExtractor as LinkExtractor
diff --git a/scrapy/contrib/linkextractors/htmlparser.py b/scrapy/contrib/linkextractors/htmlparser.py
deleted file mode 100644
index fff9eabe6..000000000
--- a/scrapy/contrib/linkextractors/htmlparser.py
+++ /dev/null
@@ -1,75 +0,0 @@
-"""
-HTMLParser-based link extractor
-"""
-
-from HTMLParser import HTMLParser
-from six.moves.urllib.parse import urljoin
-
-from w3lib.url import safe_url_string
-
-from scrapy.link import Link
-from scrapy.utils.python import unique as unique_list
-
-class HtmlParserLinkExtractor(HTMLParser):
-
-    def __init__(self, tag="a", attr="href", process=None, unique=False):
-        HTMLParser.__init__(self)
-
-        self.scan_tag = tag if callable(tag) else lambda t: t == tag
-        self.scan_attr = attr if callable(attr) else lambda a: a == attr
-        self.process_attr = process if callable(process) else lambda v: v
-        self.unique = unique
-
-    def _extract_links(self, response_text, response_url, response_encoding):
-        self.reset()
-        self.feed(response_text)
-        self.close()
-
-        links = unique_list(self.links, key=lambda link: link.url) if self.unique else self.links
-
-        ret = []
-        base_url = urljoin(response_url, self.base_url) if self.base_url else response_url
-        for link in links:
-            if isinstance(link.url, unicode):
-                link.url = link.url.encode(response_encoding)
-            link.url = urljoin(base_url, link.url)
-            link.url = safe_url_string(link.url, response_encoding)
-            link.text = link.text.decode(response_encoding)
-            ret.append(link)
-
-        return ret
-
-    def extract_links(self, response):
-        # wrapper needed to allow to work directly with text
-        return self._extract_links(response.body, response.url, response.encoding)
-
-    def reset(self):
-        HTMLParser.reset(self)
-
-        self.base_url = None
-        self.current_link = None
-        self.links = []
-
-    def handle_starttag(self, tag, attrs):
-        if tag == 'base':
-            self.base_url = dict(attrs).get('href')
-        if self.scan_tag(tag):
-            for attr, value in attrs:
-                if self.scan_attr(attr):
-                    url = self.process_attr(value)
-                    link = Link(url=url)
-                    self.links.append(link)
-                    self.current_link = link
-
-    def handle_endtag(self, tag):
-        if self.scan_tag(tag):
-            self.current_link = None
-
-    def handle_data(self, data):
-        if self.current_link:
-            self.current_link.text = self.current_link.text + data
-
-    def matches(self, url):
-        """This extractor matches with any url, since
-        it doesn't contain any patterns"""
-        return True
diff --git a/scrapy/contrib/linkextractors/lxmlhtml.py b/scrapy/contrib/linkextractors/lxmlhtml.py
deleted file mode 100644
index b6de74f33..000000000
--- a/scrapy/contrib/linkextractors/lxmlhtml.py
+++ /dev/null
@@ -1,110 +0,0 @@
-"""
-Link extractor based on lxml.html
-"""
-
-import re
-from six.moves.urllib.parse import urlparse, urljoin
-
-import lxml.etree as etree
-
-from scrapy.selector import Selector
-from scrapy.link import Link
-from scrapy.utils.misc import arg_to_iter
-from scrapy.utils.python import unique as unique_list, str_to_unicode
-from scrapy.linkextractor import FilteringLinkExtractor
-from scrapy.utils.response import get_base_url
-
-
-# from lxml/src/lxml/html/__init__.py
-XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"
-
-_collect_string_content = etree.XPath("string()")
-
-def _nons(tag):
-    if isinstance(tag, basestring):
-        if tag[0] == '{' and tag[1:len(XHTML_NAMESPACE)+1] == XHTML_NAMESPACE:
-            return tag.split('}')[-1]
-    return tag
-
-
-class LxmlParserLinkExtractor(object):
-    def __init__(self, tag="a", attr="href", process=None, unique=False):
-        self.scan_tag = tag if callable(tag) else lambda t: t == tag
-        self.scan_attr = attr if callable(attr) else lambda a: a == attr
-        self.process_attr = process if callable(process) else lambda v: v
-        self.unique = unique
-
-    def _iter_links(self, document):
-        for el in document.iter(etree.Element):
-            if not self.scan_tag(_nons(el.tag)):
-                continue
-            attribs = el.attrib
-            for attrib in attribs:
-                if not self.scan_attr(attrib):
-                    continue
-                yield (el, attrib, attribs[attrib])
-
-    def _extract_links(self, selector, response_url, response_encoding, base_url):
-        links = []
-        # hacky way to get the underlying lxml parsed document
-        for el, attr, attr_val in self._iter_links(selector._root):
-            # pseudo lxml.html.HtmlElement.make_links_absolute(base_url)
-            attr_val = urljoin(base_url, attr_val)
-            url = self.process_attr(attr_val)
-            if url is None:
-                continue
-            if isinstance(url, unicode):
-                url = url.encode(response_encoding)
-            # to fix relative links after process_value
-            url = urljoin(response_url, url)
-            link = Link(url, _collect_string_content(el) or u'',
-                nofollow=True if el.get('rel') == 'nofollow' else False)
-            links.append(link)
-
-        return unique_list(links, key=lambda link: link.url) \
-                if self.unique else links
-
-    def extract_links(self, response):
-        html = Selector(response)
-        base_url = get_base_url(response)
-        return self._extract_links(html, response.url, response.encoding, base_url)
-
-    def _process_links(self, links):
-        """ Normalize and filter extracted links
-
-        The subclass should override it if neccessary
-        """
-        links = unique_list(links, key=lambda link: link.url) if self.unique else links
-        return links
-
-
-class LxmlLinkExtractor(FilteringLinkExtractor):
-
-    def __init__(self, allow=(), deny=(), allow_domains=(), deny_domains=(), restrict_xpaths=(),
-                 tags=('a', 'area'), attrs=('href',), canonicalize=True, unique=True, process_value=None,
-                 deny_extensions=None):
-        tags, attrs = set(arg_to_iter(tags)), set(arg_to_iter(attrs))
-        tag_func = lambda x: x in tags
-        attr_func = lambda x: x in attrs
-        lx = LxmlParserLinkExtractor(tag=tag_func, attr=attr_func,
-            unique=unique, process=process_value)
-
-        super(LxmlLinkExtractor, self).__init__(lx, allow, deny,
-            allow_domains, deny_domains, restrict_xpaths, canonicalize,
-            deny_extensions)
-
-    def extract_links(self, response):
-        html = Selector(response)
-        base_url = get_base_url(response)
-        if self.restrict_xpaths:
-            docs = [subdoc
-                    for x in self.restrict_xpaths
-                    for subdoc in html.xpath(x)]
-        else:
-            docs = [html]
-        all_links = []
-        for doc in docs:
-            links = self._extract_links(doc, response.url, response.encoding, base_url)
-            all_links.extend(self._process_links(links))
-        return unique_list(all_links)
-
diff --git a/scrapy/contrib/linkextractors/regex.py b/scrapy/contrib/linkextractors/regex.py
deleted file mode 100644
index 905eb8969..000000000
--- a/scrapy/contrib/linkextractors/regex.py
+++ /dev/null
@@ -1,30 +0,0 @@
-import re
-from six.moves.urllib.parse import urljoin
-
-from w3lib.html import remove_tags, replace_entities, replace_escape_chars
-
-from scrapy.link import Link
-from .sgml import SgmlLinkExtractor
-
-linkre = re.compile(
-        "<a\s.*?href=(\"[.#]+?\"|\'[.#]+?\'|[^\s]+?)(>|\s.*?>)(.*?)<[/ ]?a>",
-        re.DOTALL | re.IGNORECASE)
-
-def clean_link(link_text):
-    """Remove leading and trailing whitespace and punctuation"""
-    return link_text.strip("\t\r\n '\"")
-
-class RegexLinkExtractor(SgmlLinkExtractor):
-    """High performant link extractor"""
-
-    def _extract_links(self, response_text, response_url, response_encoding, base_url=None):
-        if base_url is None:
-            base_url = urljoin(response_url, self.base_url) if self.base_url else response_url
-
-        clean_url = lambda u: urljoin(base_url, replace_entities(clean_link(u.decode(response_encoding))))
-        clean_text = lambda t: replace_escape_chars(remove_tags(t.decode(response_encoding))).strip()
-
-        links_text = linkre.findall(response_text)
-        return [Link(clean_url(url).encode(response_encoding),
-                     clean_text(text))
-                for url, _, text in links_text]
diff --git a/scrapy/contrib/linkextractors/sgml.py b/scrapy/contrib/linkextractors/sgml.py
deleted file mode 100644
index 9a55c1581..000000000
--- a/scrapy/contrib/linkextractors/sgml.py
+++ /dev/null
@@ -1,138 +0,0 @@
-"""
-SGMLParser-based Link extractors
-"""
-from six.moves.urllib.parse import urljoin
-import warnings
-from sgmllib import SGMLParser
-
-from w3lib.url import safe_url_string
-from scrapy.selector import Selector
-from scrapy.link import Link
-from scrapy.linkextractor import FilteringLinkExtractor
-from scrapy.utils.misc import arg_to_iter
-from scrapy.utils.python import unique as unique_list, str_to_unicode
-from scrapy.utils.response import get_base_url
-from scrapy.exceptions import ScrapyDeprecationWarning
-
-
-class BaseSgmlLinkExtractor(SGMLParser):
-
-    def __init__(self, tag="a", attr="href", unique=False, process_value=None):
-        warnings.warn(
-            "BaseSgmlLinkExtractor is deprecated and will be removed in future releases. "
-            "Please use scrapy.contrib.linkextractors.LinkExtractor",
-            ScrapyDeprecationWarning
-        )
-        SGMLParser.__init__(self)
-        self.scan_tag = tag if callable(tag) else lambda t: t == tag
-        self.scan_attr = attr if callable(attr) else lambda a: a == attr
-        self.process_value = (lambda v: v) if process_value is None else process_value
-        self.current_link = None
-        self.unique = unique
-
-    def _extract_links(self, response_text, response_url, response_encoding, base_url=None):
-        """ Do the real extraction work """
-        self.reset()
-        self.feed(response_text)
-        self.close()
-
-        ret = []
-        if base_url is None:
-            base_url = urljoin(response_url, self.base_url) if self.base_url else response_url
-        for link in self.links:
-            if isinstance(link.url, unicode):
-                link.url = link.url.encode(response_encoding)
-            link.url = urljoin(base_url, link.url)
-            link.url = safe_url_string(link.url, response_encoding)
-            link.text = str_to_unicode(link.text, response_encoding, errors='replace').strip()
-            ret.append(link)
-
-        return ret
-
-    def _process_links(self, links):
-        """ Normalize and filter extracted links
-
-        The subclass should override it if necessary
-        """
-        links = unique_list(links, key=lambda link: link.url) if self.unique else links
-        return links
-
-    def extract_links(self, response):
-        # wrapper needed to allow to work directly with text
-        links = self._extract_links(response.body, response.url, response.encoding)
-        links = self._process_links(links)
-        return links
-
-    def reset(self):
-        SGMLParser.reset(self)
-        self.links = []
-        self.base_url = None
-
-    def unknown_starttag(self, tag, attrs):
-        if tag == 'base':
-            self.base_url = dict(attrs).get('href')
-        if self.scan_tag(tag):
-            for attr, value in attrs:
-                if self.scan_attr(attr):
-                    url = self.process_value(value)
-                    if url is not None:
-                        link = Link(url=url, nofollow=True if dict(attrs).get('rel') == 'nofollow' else False)
-                        self.links.append(link)
-                        self.current_link = link
-
-    def unknown_endtag(self, tag):
-        if self.scan_tag(tag):
-            self.current_link = None
-
-    def handle_data(self, data):
-        if self.current_link:
-            self.current_link.text = self.current_link.text + data
-
-    def matches(self, url):
-        """This extractor matches with any url, since
-        it doesn't contain any patterns"""
-        return True
-
-
-class SgmlLinkExtractor(FilteringLinkExtractor):
-
-    def __init__(self, allow=(), deny=(), allow_domains=(), deny_domains=(), restrict_xpaths=(),
-                 tags=('a', 'area'), attrs=('href',), canonicalize=True, unique=True, process_value=None,
-                 deny_extensions=None):
-
-        warnings.warn(
-            "SgmlLinkExtractor is deprecated and will be removed in future releases. "
-            "Please use scrapy.contrib.linkextractors.LinkExtractor",
-            ScrapyDeprecationWarning
-        )
-
-        tags, attrs = set(arg_to_iter(tags)), set(arg_to_iter(attrs))
-        tag_func = lambda x: x in tags
-        attr_func = lambda x: x in attrs
-
-        with warnings.catch_warnings(record=True):
-            lx = BaseSgmlLinkExtractor(tag=tag_func, attr=attr_func,
-                unique=unique, process_value=process_value)
-
-        super(SgmlLinkExtractor, self).__init__(lx, allow, deny,
-            allow_domains, deny_domains, restrict_xpaths, canonicalize,
-            deny_extensions)
-
-        # FIXME: was added to fix a RegexLinkExtractor testcase
-        self.base_url = None
-
-    def extract_links(self, response):
-        base_url = None
-        if self.restrict_xpaths:
-            sel = Selector(response)
-            base_url = get_base_url(response)
-            body = u''.join(f
-                            for x in self.restrict_xpaths
-                            for f in sel.xpath(x).extract()
-                            ).encode(response.encoding, errors='xmlcharrefreplace')
-        else:
-            body = response.body
-
-        links = self._extract_links(body, response.url, response.encoding, base_url)
-        links = self._process_links(links)
-        return links
diff --git a/scrapy/contrib/loader/__init__.py b/scrapy/contrib/loader/__init__.py
deleted file mode 100644
index 7be2524b9..000000000
--- a/scrapy/contrib/loader/__init__.py
+++ /dev/null
@@ -1,172 +0,0 @@
-"""Item Loader
-
-See documentation in docs/topics/loaders.rst
-
-"""
-from collections import defaultdict
-import six
-
-from scrapy.item import Item
-from scrapy.selector import Selector
-from scrapy.utils.decorator import deprecated
-from scrapy.utils.deprecate import create_deprecated_class
-from scrapy.utils.misc import arg_to_iter, extract_regex
-from scrapy.utils.python import flatten
-
-from .common import wrap_loader_context
-from .processor import Identity
-
-
-class ItemLoader(object):
-
-    default_item_class = Item
-    default_input_processor = Identity()
-    default_output_processor = Identity()
-    default_selector_class = Selector
-
-    def __init__(self, item=None, selector=None, response=None, **context):
-        if selector is None and response is not None:
-            selector = self.default_selector_class(response)
-        self.selector = selector
-        context.update(selector=selector, response=response)
-        if item is None:
-            item = self.default_item_class()
-        self.item = context['item'] = item
-        self.context = context
-        self._values = defaultdict(list)
-
-    def add_value(self, field_name, value, *processors, **kw):
-        value = self.get_value(value, *processors, **kw)
-        if value is None:
-            return
-        if not field_name:
-            for k, v in six.iteritems(value):
-                self._add_value(k, v)
-        else:
-            self._add_value(field_name, value)
-
-    def replace_value(self, field_name, value, *processors, **kw):
-        value = self.get_value(value, *processors, **kw)
-        if value is None:
-            return
-        if not field_name:
-            for k, v in six.iteritems(value):
-                self._replace_value(k, v)
-        else:
-            self._replace_value(field_name, value)
-
-    def _add_value(self, field_name, value):
-        value = arg_to_iter(value)
-        processed_value = self._process_input_value(field_name, value)
-        if processed_value:
-            self._values[field_name] += arg_to_iter(processed_value)
-
-    def _replace_value(self, field_name, value):
-        self._values.pop(field_name, None)
-        self._add_value(field_name, value)
-
-    def get_value(self, value, *processors, **kw):
-        regex = kw.get('re', None)
-        if regex:
-            value = arg_to_iter(value)
-            value = flatten([extract_regex(regex, x) for x in value])
-
-        for proc in processors:
-            if value is None:
-                break
-            proc = wrap_loader_context(proc, self.context)
-            value = proc(value)
-        return value
-
-    def load_item(self):
-        item = self.item
-        for field_name in self._values:
-            value = self.get_output_value(field_name)
-            if value is not None:
-                item[field_name] = value
-        return item
-
-    def get_output_value(self, field_name):
-        proc = self.get_output_processor(field_name)
-        proc = wrap_loader_context(proc, self.context)
-        try:
-            return proc(self._values[field_name])
-        except Exception as e:
-            raise ValueError("Error with output processor: field=%r value=%r error='%s: %s'" % \
-                (field_name, self._values[field_name], type(e).__name__, str(e)))
-
-    def get_collected_values(self, field_name):
-        return self._values[field_name]
-
-    def get_input_processor(self, field_name):
-        proc = getattr(self, '%s_in' % field_name, None)
-        if not proc:
-            proc = self._get_item_field_attr(field_name, 'input_processor', \
-                self.default_input_processor)
-        return proc
-
-    def get_output_processor(self, field_name):
-        proc = getattr(self, '%s_out' % field_name, None)
-        if not proc:
-            proc = self._get_item_field_attr(field_name, 'output_processor', \
-                self.default_output_processor)
-        return proc
-
-    def _process_input_value(self, field_name, value):
-        proc = self.get_input_processor(field_name)
-        proc = wrap_loader_context(proc, self.context)
-        return proc(value)
-
-    def _get_item_field_attr(self, field_name, key, default=None):
-        if isinstance(self.item, Item):
-            value = self.item.fields[field_name].get(key, default)
-        else:
-            value = default
-        return value
-
-    def _check_selector_method(self):
-        if self.selector is None:
-            raise RuntimeError("To use XPath or CSS selectors, "
-                "%s must be instantiated with a selector "
-                "or a response" % self.__class__.__name__)
-
-    def add_xpath(self, field_name, xpath, *processors, **kw):
-        values = self._get_xpathvalues(xpath, **kw)
-        self.add_value(field_name, values, *processors, **kw)
-
-    def replace_xpath(self, field_name, xpath, *processors, **kw):
-        values = self._get_xpathvalues(xpath, **kw)
-        self.replace_value(field_name, values, *processors, **kw)
-
-    def get_xpath(self, xpath, *processors, **kw):
-        values = self._get_xpathvalues(xpath, **kw)
-        return self.get_value(values, *processors, **kw)
-
-    @deprecated(use_instead='._get_xpathvalues()')
-    def _get_values(self, xpaths, **kw):
-        return self._get_xpathvalues(xpaths, **kw)
-
-    def _get_xpathvalues(self, xpaths, **kw):
-        self._check_selector_method()
-        xpaths = arg_to_iter(xpaths)
-        return flatten([self.selector.xpath(xpath).extract() for xpath in xpaths])
-
-    def add_css(self, field_name, css, *processors, **kw):
-        values = self._get_cssvalues(css, **kw)
-        self.add_value(field_name, values, *processors, **kw)
-
-    def replace_css(self, field_name, css, *processors, **kw):
-        values = self._get_cssvalues(css, **kw)
-        self.replace_value(field_name, values, *processors, **kw)
-
-    def get_css(self, css, *processors, **kw):
-        values = self._get_cssvalues(css, **kw)
-        return self.get_value(values, *processors, **kw)
-
-    def _get_cssvalues(self, csss, **kw):
-        self._check_selector_method()
-        csss = arg_to_iter(csss)
-        return flatten([self.selector.css(css).extract() for css in csss])
-
-
-XPathItemLoader = create_deprecated_class('XPathItemLoader', ItemLoader)
diff --git a/scrapy/contrib/loader/common.py b/scrapy/contrib/loader/common.py
deleted file mode 100644
index 916524947..000000000
--- a/scrapy/contrib/loader/common.py
+++ /dev/null
@@ -1,13 +0,0 @@
-"""Common functions used in Item Loaders code"""
-
-from functools import partial
-from scrapy.utils.python import get_func_args
-
-def wrap_loader_context(function, context):
-    """Wrap functions that receive loader_context to contain the context
-    "pre-loaded" and expose a interface that receives only one argument
-    """
-    if 'loader_context' in get_func_args(function):
-        return partial(function, loader_context=context)
-    else:
-        return function
diff --git a/scrapy/contrib/loader/processor.py b/scrapy/contrib/loader/processor.py
deleted file mode 100644
index 8c74c5358..000000000
--- a/scrapy/contrib/loader/processor.py
+++ /dev/null
@@ -1,72 +0,0 @@
-"""
-This module provides some commonly used processors for Item Loaders.
-
-See documentation in docs/topics/loaders.rst
-"""
-
-from scrapy.utils.misc import arg_to_iter
-from scrapy.utils.datatypes import MergeDict
-from .common import wrap_loader_context
-
-class MapCompose(object):
-
-    def __init__(self, *functions, **default_loader_context):
-        self.functions = functions
-        self.default_loader_context = default_loader_context
-
-    def __call__(self, value, loader_context=None):
-        values = arg_to_iter(value)
-        if loader_context:
-            context = MergeDict(loader_context, self.default_loader_context)
-        else:
-            context = self.default_loader_context
-        wrapped_funcs = [wrap_loader_context(f, context) for f in self.functions]
-        for func in wrapped_funcs:
-            next_values = []
-            for v in values:
-                next_values += arg_to_iter(func(v))
-            values = next_values
-        return values
-
-
-class Compose(object):
-
-    def __init__(self, *functions, **default_loader_context):
-        self.functions = functions
-        self.stop_on_none = default_loader_context.get('stop_on_none', True)
-        self.default_loader_context = default_loader_context
-
-    def __call__(self, value, loader_context=None):
-        if loader_context:
-            context = MergeDict(loader_context, self.default_loader_context)
-        else:
-            context = self.default_loader_context
-        wrapped_funcs = [wrap_loader_context(f, context) for f in self.functions]
-        for func in wrapped_funcs:
-            if value is None and self.stop_on_none:
-                break
-            value = func(value)
-        return value
-
-
-class TakeFirst(object):
-
-    def __call__(self, values):
-        for value in values:
-            if value is not None and value != '':
-                return value
-
-
-class Identity(object):
-
-    def __call__(self, values):
-        return values
-
-
-class Join(object):
-
-    def __init__(self, separator=u' '):
-        self.separator = separator
-
-    def __call__(self, values):
-        return self.separator.join(values)
diff --git a/scrapy/contrib/pipeline/__init__.py b/scrapy/contrib/pipeline/__init__.py
deleted file mode 100644
index d433498f5..000000000
--- a/scrapy/contrib/pipeline/__init__.py
+++ /dev/null
@@ -1,32 +0,0 @@
-"""
-Item pipeline
-
-See documentation in docs/item-pipeline.rst
-"""
-
-from scrapy.middleware import MiddlewareManager
-from scrapy.utils.conf import build_component_list
-
-class ItemPipelineManager(MiddlewareManager):
-
-    component_name = 'item pipeline'
-
-    @classmethod
-    def _get_mwlist_from_settings(cls, settings):
-        item_pipelines = settings['ITEM_PIPELINES']
-        if isinstance(item_pipelines, (tuple, list, set, frozenset)):
-            from scrapy.exceptions import ScrapyDeprecationWarning
-            import warnings
-            warnings.warn('ITEM_PIPELINES defined as a list or a set is deprecated, switch to a dict',
-                category=ScrapyDeprecationWarning, stacklevel=1)
-            # convert old ITEM_PIPELINE list to a dict with order 500
-            item_pipelines = dict(zip(item_pipelines, range(500, 500+len(item_pipelines))))
-        return build_component_list(settings['ITEM_PIPELINES_BASE'], item_pipelines)
-
-    def _add_middleware(self, pipe):
-        super(ItemPipelineManager, self)._add_middleware(pipe)
-        if hasattr(pipe, 'process_item'):
-            self.methods['process_item'].append(pipe.process_item)
-
-    def process_item(self, item, spider):
-        return self._process_chain('process_item', item, spider)
diff --git a/scrapy/contrib/pipeline/files.py b/scrapy/contrib/pipeline/files.py
deleted file mode 100644
index db8cf8b76..000000000
--- a/scrapy/contrib/pipeline/files.py
+++ /dev/null
@@ -1,303 +0,0 @@
-"""
-Files Pipeline
-"""
-
-import hashlib
-import os
-import os.path
-import rfc822
-import time
-from six.moves.urllib.parse import urlparse
-from collections import defaultdict
-import six
-
-try:
-    from cStringIO import StringIO as BytesIO
-except ImportError:
-    from io import BytesIO
-
-from twisted.internet import defer, threads
-
-from scrapy import log
-from scrapy.contrib.pipeline.media import MediaPipeline
-from scrapy.exceptions import NotConfigured, IgnoreRequest
-from scrapy.http import Request
-from scrapy.utils.misc import md5sum
-
-
-class FileException(Exception):
-    """General media error exception"""
-
-
-class FSFilesStore(object):
-
-    def __init__(self, basedir):
-        if '://' in basedir:
-            basedir = basedir.split('://', 1)[1]
-        self.basedir = basedir
-        self._mkdir(self.basedir)
-        self.created_directories = defaultdict(set)
-
-    def persist_file(self, path, buf, info, meta=None, headers=None):
-        absolute_path = self._get_filesystem_path(path)
-        self._mkdir(os.path.dirname(absolute_path), info)
-        with open(absolute_path, 'wb') as f:
-            f.write(buf.getvalue())
-
-    def stat_file(self, path, info):
-        absolute_path = self._get_filesystem_path(path)
-        try:
-            last_modified = os.path.getmtime(absolute_path)
-        except:  # FIXME: catching everything!
-            return {}
-
-        with open(absolute_path, 'rb') as f:
-            checksum = md5sum(f)
-
-        return {'last_modified': last_modified, 'checksum': checksum}
-
-    def _get_filesystem_path(self, path):
-        path_comps = path.split('/')
-        return os.path.join(self.basedir, *path_comps)
-
-    def _mkdir(self, dirname, domain=None):
-        seen = self.created_directories[domain] if domain else set()
-        if dirname not in seen:
-            if not os.path.exists(dirname):
-                os.makedirs(dirname)
-            seen.add(dirname)
-
-
-class S3FilesStore(object):
-
-    AWS_ACCESS_KEY_ID = None
-    AWS_SECRET_ACCESS_KEY = None
-
-    POLICY = 'public-read'
-    HEADERS = {
-        'Cache-Control': 'max-age=172800',
-    }
-
-    def __init__(self, uri):
-        assert uri.startswith('s3://')
-        self.bucket, self.prefix = uri[5:].split('/', 1)
-
-    def stat_file(self, path, info):
-        def _onsuccess(boto_key):
-            checksum = boto_key.etag.strip('"')
-            last_modified = boto_key.last_modified
-            modified_tuple = rfc822.parsedate_tz(last_modified)
-            modified_stamp = int(rfc822.mktime_tz(modified_tuple))
-            return {'checksum': checksum, 'last_modified': modified_stamp}
-
-        return self._get_boto_key(path).addCallback(_onsuccess)
-
-    def _get_boto_bucket(self):
-        from boto.s3.connection import S3Connection
-        # disable ssl (is_secure=False) because of this python bug:
-        # http://bugs.python.org/issue5103
-        c = S3Connection(self.AWS_ACCESS_KEY_ID, self.AWS_SECRET_ACCESS_KEY, is_secure=False)
-        return c.get_bucket(self.bucket, validate=False)
-
-    def _get_boto_key(self, path):
-        b = self._get_boto_bucket()
-        key_name = '%s%s' % (self.prefix, path)
-        return threads.deferToThread(b.get_key, key_name)
-
-    def persist_file(self, path, buf, info, meta=None, headers=None):
-        """Upload file to S3 storage"""
-        b = self._get_boto_bucket()
-        key_name = '%s%s' % (self.prefix, path)
-        k = b.new_key(key_name)
-        if meta:
-            for metakey, metavalue in six.iteritems(meta):
-                k.set_metadata(metakey, str(metavalue))
-        h = self.HEADERS.copy()
-        if headers:
-            h.update(headers)
-        buf.seek(0)
-        return threads.deferToThread(k.set_contents_from_string, buf.getvalue(),
-                                     headers=h, policy=self.POLICY)
-
-
-class FilesPipeline(MediaPipeline):
-    """Abstract pipeline that implement the file downloading
-
-    This pipeline tries to minimize network transfers and file processing,
-    doing stat of the files and determining if file is new, uptodate or
-    expired.
-
-    `new` files are those that pipeline never processed and needs to be
-        downloaded from supplier site the first time.
-
-    `uptodate` files are the ones that the pipeline processed and are still
-        valid files.
-
-    `expired` files are those that pipeline already processed but the last
-        modification was made long time ago, so a reprocessing is recommended to
-        refresh it in case of change.
-
-    """
-
-    MEDIA_NAME = "file"
-    EXPIRES = 90
-    STORE_SCHEMES = {
-        '': FSFilesStore,
-        'file': FSFilesStore,
-        's3': S3FilesStore,
-    }
-    DEFAULT_FILES_URLS_FIELD = 'file_urls'
-    DEFAULT_FILES_RESULT_FIELD = 'files'
-
-    def __init__(self, store_uri, download_func=None):
-        if not store_uri:
-            raise NotConfigured
-        self.store = self._get_store(store_uri)
-        super(FilesPipeline, self).__init__(download_func=download_func)
-
-    @classmethod
-    def from_settings(cls, settings):
-        s3store = cls.STORE_SCHEMES['s3']
-        s3store.AWS_ACCESS_KEY_ID = settings['AWS_ACCESS_KEY_ID']
-        s3store.AWS_SECRET_ACCESS_KEY = settings['AWS_SECRET_ACCESS_KEY']
-
-        cls.FILES_URLS_FIELD = settings.get('FILES_URLS_FIELD', cls.DEFAULT_FILES_URLS_FIELD)
-        cls.FILES_RESULT_FIELD = settings.get('FILES_RESULT_FIELD', cls.DEFAULT_FILES_RESULT_FIELD)
-        cls.EXPIRES = settings.getint('FILES_EXPIRES', 90)
-        store_uri = settings['FILES_STORE']
-        return cls(store_uri)
-
-    def _get_store(self, uri):
-        if os.path.isabs(uri):  # to support win32 paths like: C:\\some\dir
-            scheme = 'file'
-        else:
-            scheme = urlparse(uri).scheme
-        store_cls = self.STORE_SCHEMES[scheme]
-        return store_cls(uri)
-
-    def media_to_download(self, request, info):
-        def _onsuccess(result):
-            if not result:
-                return  # returning None force download
-
-            last_modified = result.get('last_modified', None)
-            if not last_modified:
-                return  # returning None force download
-
-            age_seconds = time.time() - last_modified
-            age_days = age_seconds / 60 / 60 / 24
-            if age_days > self.EXPIRES:
-                return  # returning None force download
-
-            referer = request.headers.get('Referer')
-            log.msg(format='File (uptodate): Downloaded %(medianame)s from %(request)s referred in <%(referer)s>',
-                    level=log.DEBUG, spider=info.spider,
-                    medianame=self.MEDIA_NAME, request=request, referer=referer)
-            self.inc_stats(info.spider, 'uptodate')
-
-            checksum = result.get('checksum', None)
-            return {'url': request.url, 'path': path, 'checksum': checksum}
-
-        path = self.file_path(request, info=info)
-        dfd = defer.maybeDeferred(self.store.stat_file, path, info)
-        dfd.addCallbacks(_onsuccess, lambda _: None)
-        dfd.addErrback(log.err, self.__class__.__name__ + '.store.stat_file')
-        return dfd
-
-    def media_failed(self, failure, request, info):
-        if not isinstance(failure.value, IgnoreRequest):
-            referer = request.headers.get('Referer')
-            log.msg(format='File (unknown-error): Error downloading '
-                           '%(medianame)s from %(request)s referred in '
-                           '<%(referer)s>: %(exception)s',
-                    level=log.WARNING, spider=info.spider, exception=failure.value,
-                    medianame=self.MEDIA_NAME, request=request, referer=referer)
-
-        raise FileException
-
-    def media_downloaded(self, response, request, info):
-        referer = request.headers.get('Referer')
-
-        if response.status != 200:
-            log.msg(format='File (code: %(status)s): Error downloading file from %(request)s referred in <%(referer)s>',
-                    level=log.WARNING, spider=info.spider,
-                    status=response.status, request=request, referer=referer)
-            raise FileException('download-error')
-
-        if not response.body:
-            log.msg(format='File (empty-content): Empty file from %(request)s referred in <%(referer)s>: no-content',
-                    level=log.WARNING, spider=info.spider,
-                    request=request, referer=referer)
-            raise FileException('empty-content')
-
-        status = 'cached' if 'cached' in response.flags else 'downloaded'
-        log.msg(format='File (%(status)s): Downloaded file from %(request)s referred in <%(referer)s>',
-                level=log.DEBUG, spider=info.spider,
-                status=status, request=request, referer=referer)
-        self.inc_stats(info.spider, status)
-
-        try:
-            path = self.file_path(request, response=response, info=info)
-            checksum = self.file_downloaded(response, request, info)
-        except FileException as exc:
-            whyfmt = 'File (error): Error processing file from %(request)s referred in <%(referer)s>: %(errormsg)s'
-            log.msg(format=whyfmt, level=log.WARNING, spider=info.spider,
-                    request=request, referer=referer, errormsg=str(exc))
-            raise
-        except Exception as exc:
-            whyfmt = 'File (unknown-error): Error processing file from %(request)s referred in <%(referer)s>'
-            log.err(None, whyfmt % {'request': request, 'referer': referer}, spider=info.spider)
-            raise FileException(str(exc))
-
-        return {'url': request.url, 'path': path, 'checksum': checksum}
-
-    def inc_stats(self, spider, status):
-        spider.crawler.stats.inc_value('file_count', spider=spider)
-        spider.crawler.stats.inc_value('file_status_count/%s' % status, spider=spider)
-
-    ### Overridable Interface
-    def get_media_requests(self, item, info):
-        return [Request(x) for x in item.get(self.FILES_URLS_FIELD, [])]
-
-    def file_downloaded(self, response, request, info):
-        path = self.file_path(request, response=response, info=info)
-        buf = BytesIO(response.body)
-        self.store.persist_file(path, buf, info)
-        checksum = md5sum(buf)
-        return checksum
-
-    def item_completed(self, results, item, info):
-        if self.FILES_RESULT_FIELD in item.fields:
-            item[self.FILES_RESULT_FIELD] = [x for ok, x in results if ok]
-        return item
-
-    def file_path(self, request, response=None, info=None):
-        ## start of deprecation warning block (can be removed in the future)
-        def _warn():
-            from scrapy.exceptions import ScrapyDeprecationWarning
-            import warnings
-            warnings.warn('FilesPipeline.file_key(url) method is deprecated, please use '
-                          'file_path(request, response=None, info=None) instead',
-                          category=ScrapyDeprecationWarning, stacklevel=1)
-
-        # check if called from file_key with url as first argument
-        if not isinstance(request, Request):
-            _warn()
-            url = request
-        else:
-            url = request.url
-
-        # detect if file_key() method has been overridden
-        if not hasattr(self.file_key, '_base'):
-            _warn()
-            return self.file_key(url)
-        ## end of deprecation warning block
-
-        media_guid = hashlib.sha1(url).hexdigest()  # change to request.url after deprecation
-        media_ext = os.path.splitext(url)[1]  # change to request.url after deprecation
-        return 'full/%s%s' % (media_guid, media_ext)
-
-    # deprecated
-    def file_key(self, url):
-        return self.file_path(url)
-    file_key._base = True
diff --git a/scrapy/contrib/pipeline/images.py b/scrapy/contrib/pipeline/images.py
deleted file mode 100644
index 9c1a54455..000000000
--- a/scrapy/contrib/pipeline/images.py
+++ /dev/null
@@ -1,182 +0,0 @@
-"""
-Images Pipeline
-
-See documentation in topics/images.rst
-"""
-
-import hashlib
-import six
-
-try:
-    from cStringIO import StringIO as BytesIO
-except ImportError:
-    from io import BytesIO
-
-from PIL import Image
-
-from scrapy.utils.misc import md5sum
-from scrapy.http import Request
-from scrapy.exceptions import DropItem
-#TODO: from scrapy.contrib.pipeline.media import MediaPipeline
-from scrapy.contrib.pipeline.files import FileException, FilesPipeline
-
-
-class NoimagesDrop(DropItem):
-    """Product with no images exception"""
-
-
-class ImageException(FileException):
-    """General image error exception"""
-
-
-class ImagesPipeline(FilesPipeline):
-    """Abstract pipeline that implement the image thumbnail generation logic
-
-    """
-
-    MEDIA_NAME = 'image'
-    MIN_WIDTH = 0
-    MIN_HEIGHT = 0
-    THUMBS = {}
-    DEFAULT_IMAGES_URLS_FIELD = 'image_urls'
-    DEFAULT_IMAGES_RESULT_FIELD = 'images'
-
-    @classmethod
-    def from_settings(cls, settings):
-        cls.MIN_WIDTH = settings.getint('IMAGES_MIN_WIDTH', 0)
-        cls.MIN_HEIGHT = settings.getint('IMAGES_MIN_HEIGHT', 0)
-        cls.EXPIRES = settings.getint('IMAGES_EXPIRES', 90)
-        cls.THUMBS = settings.get('IMAGES_THUMBS', {})
-        s3store = cls.STORE_SCHEMES['s3']
-        s3store.AWS_ACCESS_KEY_ID = settings['AWS_ACCESS_KEY_ID']
-        s3store.AWS_SECRET_ACCESS_KEY = settings['AWS_SECRET_ACCESS_KEY']
-
-        cls.IMAGES_URLS_FIELD = settings.get('IMAGES_URLS_FIELD', cls.DEFAULT_IMAGES_URLS_FIELD)
-        cls.IMAGES_RESULT_FIELD = settings.get('IMAGES_RESULT_FIELD', cls.DEFAULT_IMAGES_RESULT_FIELD)
-        store_uri = settings['IMAGES_STORE']
-        return cls(store_uri)
-
-    def file_downloaded(self, response, request, info):
-        return self.image_downloaded(response, request, info)
-
-    def image_downloaded(self, response, request, info):
-        checksum = None
-        for path, image, buf in self.get_images(response, request, info):
-            if checksum is None:
-                buf.seek(0)
-                checksum = md5sum(buf)
-            width, height = image.size
-            self.store.persist_file(
-                path, buf, info,
-                meta={'width': width, 'height': height},
-                headers={'Content-Type': 'image/jpeg'})
-        return checksum
-
-    def get_images(self, response, request, info):
-        path = self.file_path(request, response=response, info=info)
-        orig_image = Image.open(BytesIO(response.body))
-
-        width, height = orig_image.size
-        if width < self.MIN_WIDTH or height < self.MIN_HEIGHT:
-            raise ImageException("Image too small (%dx%d < %dx%d)" %
-                                 (width, height, self.MIN_WIDTH, self.MIN_HEIGHT))
-
-        image, buf = self.convert_image(orig_image)
-        yield path, image, buf
-
-        for thumb_id, size in six.iteritems(self.THUMBS):
-            thumb_path = self.thumb_path(request, thumb_id, response=response, info=info)
-            thumb_image, thumb_buf = self.convert_image(image, size)
-            yield thumb_path, thumb_image, thumb_buf
-
-    def convert_image(self, image, size=None):
-        if image.format == 'PNG' and image.mode == 'RGBA':
-            background = Image.new('RGBA', image.size, (255, 255, 255))
-            background.paste(image, image)
-            image = background.convert('RGB')
-        elif image.mode != 'RGB':
-            image = image.convert('RGB')
-
-        if size:
-            image = image.copy()
-            image.thumbnail(size, Image.ANTIALIAS)
-
-        buf = BytesIO()
-        image.save(buf, 'JPEG')
-        return image, buf
-
-    def get_media_requests(self, item, info):
-        return [Request(x) for x in item.get(self.IMAGES_URLS_FIELD, [])]
-
-    def item_completed(self, results, item, info):
-        if self.IMAGES_RESULT_FIELD in item.fields:
-            item[self.IMAGES_RESULT_FIELD] = [x for ok, x in results if ok]
-        return item
-
-    def file_path(self, request, response=None, info=None):
-        ## start of deprecation warning block (can be removed in the future)
-        def _warn():
-            from scrapy.exceptions import ScrapyDeprecationWarning
-            import warnings
-            warnings.warn('ImagesPipeline.image_key(url) and file_key(url) methods are deprecated, '
-                          'please use file_path(request, response=None, info=None) instead',
-                          category=ScrapyDeprecationWarning, stacklevel=1)
-
-        # check if called from image_key or file_key with url as first argument
-        if not isinstance(request, Request):
-            _warn()
-            url = request
-        else:
-            url = request.url
-
-        # detect if file_key() or image_key() methods have been overridden
-        if not hasattr(self.file_key, '_base'):
-            _warn()
-            return self.file_key(url)
-        elif not hasattr(self.image_key, '_base'):
-            _warn()
-            return self.image_key(url)
-        ## end of deprecation warning block
-
-        image_guid = hashlib.sha1(url).hexdigest()  # change to request.url after deprecation
-        return 'full/%s.jpg' % (image_guid)
-
-    def thumb_path(self, request, thumb_id, response=None, info=None):
-        ## start of deprecation warning block (can be removed in the future)
-        def _warn():
-            from scrapy.exceptions import ScrapyDeprecationWarning
-            import warnings
-            warnings.warn('ImagesPipeline.thumb_key(url) method is deprecated, please use '
-                          'thumb_path(request, thumb_id, response=None, info=None) instead',
-                          category=ScrapyDeprecationWarning, stacklevel=1)
-
-        # check if called from thumb_key with url as first argument
-        if not isinstance(request, Request):
-            _warn()
-            url = request
-        else:
-            url = request.url
-
-        # detect if thumb_key() method has been overridden
-        if not hasattr(self.thumb_key, '_base'):
-            _warn()
-            return self.thumb_key(url, thumb_id)
-        ## end of deprecation warning block
-
-        thumb_guid = hashlib.sha1(url).hexdigest()  # change to request.url after deprecation
-        return 'thumbs/%s/%s.jpg' % (thumb_id, thumb_guid)
-
-    # deprecated
-    def file_key(self, url):
-        return self.image_key(url)
-    file_key._base = True
-
-    # deprecated
-    def image_key(self, url):
-        return self.file_path(url)
-    image_key._base = True
-
-    # deprecated
-    def thumb_key(self, url, thumb_id):
-        return self.thumb_path(url, thumb_id)
-    thumb_key._base = True
diff --git a/scrapy/contrib/pipeline/media.py b/scrapy/contrib/pipeline/media.py
deleted file mode 100644
index 82270e15d..000000000
--- a/scrapy/contrib/pipeline/media.py
+++ /dev/null
@@ -1,124 +0,0 @@
-from __future__ import print_function
-from collections import defaultdict
-from twisted.internet.defer import Deferred, DeferredList
-from twisted.python.failure import Failure
-
-from scrapy.utils.defer import mustbe_deferred, defer_result
-from scrapy import log
-from scrapy.utils.request import request_fingerprint
-from scrapy.utils.misc import arg_to_iter
-
-
-class MediaPipeline(object):
-
-    LOG_FAILED_RESULTS = True
-
-    class SpiderInfo(object):
-        def __init__(self, spider):
-            self.spider = spider
-            self.downloading = set()
-            self.downloaded = {}
-            self.waiting = defaultdict(list)
-
-    def __init__(self, download_func=None):
-        self.download_func = download_func
-
-    @classmethod
-    def from_crawler(cls, crawler):
-        try:
-            pipe = cls.from_settings(crawler.settings)
-        except AttributeError:
-            pipe = cls()
-        pipe.crawler = crawler
-        return pipe
-
-    def open_spider(self, spider):
-        self.spiderinfo = self.SpiderInfo(spider)
-
-    def process_item(self, item, spider):
-        info = self.spiderinfo
-        requests = arg_to_iter(self.get_media_requests(item, info))
-        dlist = [self._process_request(r, info) for r in requests]
-        dfd = DeferredList(dlist, consumeErrors=1)
-        return dfd.addCallback(self.item_completed, item, info)
-
-    def _process_request(self, request, info):
-        fp = request_fingerprint(request)
-        cb = request.callback or (lambda _: _)
-        eb = request.errback
-        request.callback = None
-        request.errback = None
-
-        # Return cached result if request was already seen
-        if fp in info.downloaded:
-            return defer_result(info.downloaded[fp]).addCallbacks(cb, eb)
-
-        # Otherwise, wait for result
-        wad = Deferred().addCallbacks(cb, eb)
-        info.waiting[fp].append(wad)
-
-        # Check if request is downloading right now to avoid doing it twice
-        if fp in info.downloading:
-            return wad
-
-        # Download request checking media_to_download hook output first
-        info.downloading.add(fp)
-        dfd = mustbe_deferred(self.media_to_download, request, info)
-        dfd.addCallback(self._check_media_to_download, request, info)
-        dfd.addBoth(self._cache_result_and_execute_waiters, fp, info)
-        dfd.addErrback(log.err, spider=info.spider)
-        return dfd.addBoth(lambda _: wad)  # it must return wad at last
-
-    def _check_media_to_download(self, result, request, info):
-        if result is not None:
-            return result
-        if self.download_func:
-            # this ugly code was left only to support tests. TODO: remove
-            dfd = mustbe_deferred(self.download_func, request, info.spider)
-            dfd.addCallbacks(
-                callback=self.media_downloaded, callbackArgs=(request, info),
-                errback=self.media_failed, errbackArgs=(request, info))
-        else:
-            request.meta['handle_httpstatus_all'] = True
-            dfd = self.crawler.engine.download(request, info.spider)
-            dfd.addCallbacks(
-                callback=self.media_downloaded, callbackArgs=(request, info),
-                errback=self.media_failed, errbackArgs=(request, info))
-        return dfd
-
-    def _cache_result_and_execute_waiters(self, result, fp, info):
-        if isinstance(result, Failure):
-            # minimize cached information for failure
-            result.cleanFailure()
-            result.frames = []
-            result.stack = None
-        info.downloading.remove(fp)
-        info.downloaded[fp] = result  # cache result
-        for wad in info.waiting.pop(fp):
-            defer_result(result).chainDeferred(wad)
-
-    ### Overridable Interface
-    def media_to_download(self, request, info):
-        """Check request before starting download"""
-        pass
-
-    def get_media_requests(self, item, info):
-        """Returns the media requests to download"""
-        pass
-
-    def media_downloaded(self, response, request, info):
-        """Handler for success downloads"""
-        return response
-
-    def media_failed(self, failure, request, info):
-        """Handler for failed downloads"""
-        return failure
-
-    def item_completed(self, results, item, info):
-        """Called per item when all media requests has been processed"""
-        if self.LOG_FAILED_RESULTS:
-            msg = '%s found errors proessing %s' % (self.__class__.__name__, item)
-            for ok, value in results:
-                if not ok:
-                    log.err(value, msg, spider=info.spider)
-        return item
diff --git a/scrapy/contrib/spidermiddleware/referer.py b/scrapy/contrib/spidermiddleware/referer.py
deleted file mode 100644
index 6a8c46543..000000000
--- a/scrapy/contrib/spidermiddleware/referer.py
+++ /dev/null
@@ -1,23 +0,0 @@
-"""
-RefererMiddleware: populates Request referer field, based on the Response which
-originated it.
-"""
-
-from scrapy.http import Request
-from scrapy.exceptions import NotConfigured
-
-class RefererMiddleware(object):
-
-    @classmethod
-    def from_crawler(cls, crawler):
-        if not crawler.settings.getbool('REFERER_ENABLED'):
-            raise NotConfigured
-        return cls()
-
-    def process_spider_output(self, response, result, spider):
-        def _set_referer(r):
-            if isinstance(r, Request):
-                r.headers.setdefault('Referer', response.url)
-            return r
-        return (_set_referer(r) for r in result or ())
-
diff --git a/scrapy/contrib/spiders/__init__.py b/scrapy/contrib/spiders/__init__.py
deleted file mode 100644
index c16bb6c0a..000000000
--- a/scrapy/contrib/spiders/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from scrapy.contrib.spiders.crawl import CrawlSpider, Rule
-from scrapy.contrib.spiders.feed import XMLFeedSpider, CSVFeedSpider
-from scrapy.contrib.spiders.sitemap import SitemapSpider
diff --git a/scrapy/contrib/spiders/crawl.py b/scrapy/contrib/spiders/crawl.py
deleted file mode 100644
index d7e42f6f8..000000000
--- a/scrapy/contrib/spiders/crawl.py
+++ /dev/null
@@ -1,91 +0,0 @@
-"""
-This modules implements the CrawlSpider which is the recommended spider to use
-for scraping typical web sites that requires crawling pages.
-
-See documentation in docs/topics/spiders.rst
-"""
-
-import copy
-
-from scrapy.http import Request, HtmlResponse
-from scrapy.utils.spider import iterate_spider_output
-from scrapy.spider import Spider
-
-def identity(x):
-    return x
-
-class Rule(object):
-
-    def __init__(self, link_extractor, callback=None, cb_kwargs=None, follow=None, process_links=None, process_request=identity):
-        self.link_extractor = link_extractor
-        self.callback = callback
-        self.cb_kwargs = cb_kwargs or {}
-        self.process_links = process_links
-        self.process_request = process_request
-        if follow is None:
-            self.follow = False if callback else True
-        else:
-            self.follow = follow
-
-class CrawlSpider(Spider):
-
-    rules = ()
-
-    def __init__(self, *a, **kw):
-        super(CrawlSpider, self).__init__(*a, **kw)
-        self._compile_rules()
-
-    def parse(self, response):
-        return self._parse_response(response, self.parse_start_url, cb_kwargs={}, follow=True)
-
-    def parse_start_url(self, response):
-        return []
-
-    def process_results(self, response, results):
-        return results
-
-    def _requests_to_follow(self, response):
-        if not isinstance(response, HtmlResponse):
-            return
-        seen = set()
-        for n, rule in enumerate(self._rules):
-            links = [l for l in rule.link_extractor.extract_links(response) if l not in seen]
-            if links and rule.process_links:
-                links = rule.process_links(links)
-            for link in links:
-                seen.add(link)
-                r = Request(url=link.url, callback=self._response_downloaded)
-                r.meta.update(rule=n, link_text=link.text)
-                yield rule.process_request(r)
-
-    def _response_downloaded(self, response):
-        rule = self._rules[response.meta['rule']]
-        return self._parse_response(response, rule.callback, rule.cb_kwargs, rule.follow)
-
-    def _parse_response(self, response, callback, cb_kwargs, follow=True):
-        if callback:
-            cb_res = callback(response, **cb_kwargs) or ()
-            cb_res = self.process_results(response, cb_res)
-            for requests_or_item in iterate_spider_output(cb_res):
-                yield requests_or_item
-
-        if follow and self._follow_links:
-            for request_or_item in self._requests_to_follow(response):
-                yield request_or_item
-
-    def _compile_rules(self):
-        def get_method(method):
-            if callable(method):
-                return method
-            elif isinstance(method, basestring):
-                return getattr(self, method, None)
-
-        self._rules = [copy.copy(r) for r in self.rules]
-        for rule in self._rules:
-            rule.callback = get_method(rule.callback)
-            rule.process_links = get_method(rule.process_links)
-            rule.process_request = get_method(rule.process_request)
-
-    def set_crawler(self, crawler):
-        super(CrawlSpider, self).set_crawler(crawler)
-        self._follow_links = crawler.settings.getbool('CRAWLSPIDER_FOLLOW_LINKS', True)
diff --git a/scrapy/contrib_exp/__init__.py b/scrapy/contrib_exp/__init__.py
deleted file mode 100644
index 406c7c74d..000000000
--- a/scrapy/contrib_exp/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-"""
-This module contains experimental code that may go into scrapy.contrib in the
-future, but it's not yet stable enough to go there (either API stable or
-functionality stable).
-
-Subscribe to Scrapy developers mailing list or join the IRC channel if you want
-to discuss about this code.
-
-"""
diff --git a/scrapy/contrib_exp/djangoitem.py b/scrapy/contrib_exp/djangoitem.py
deleted file mode 100644
index 1e855b404..000000000
--- a/scrapy/contrib_exp/djangoitem.py
+++ /dev/null
@@ -1,6 +0,0 @@
-import warnings
-from scrapy.exceptions import ScrapyDeprecationWarning
-warnings.warn("Module `scrapy.contrib_exp.djangoitem` is deprecated, use `scrapy.contrib.djangoitem` instead",
-    ScrapyDeprecationWarning, stacklevel=2)
-
-from scrapy.contrib.djangoitem import DjangoItem
diff --git a/scrapy/contrib_exp/iterators.py b/scrapy/contrib_exp/iterators.py
deleted file mode 100644
index 7cf9103fd..000000000
--- a/scrapy/contrib_exp/iterators.py
+++ /dev/null
@@ -1,42 +0,0 @@
-from scrapy.http import Response
-from scrapy.selector import Selector
-
-
-def xmliter_lxml(obj, nodename, namespace=None):
-    from lxml import etree
-    reader = _StreamReader(obj)
-    tag = '{%s}%s' % (namespace, nodename) if namespace else nodename
-    iterable = etree.iterparse(reader, tag=tag, encoding=reader.encoding)
-    selxpath = '//' + ('x:%s' % nodename if namespace else nodename)
-    for _, node in iterable:
-        nodetext = etree.tostring(node)
-        node.clear()
-        xs = Selector(text=nodetext, type='xml')
-        if namespace:
-            xs.register_namespace('x', namespace)
-        yield xs.xpath(selxpath)[0]
-
-
-class _StreamReader(object):
-
-    def __init__(self, obj):
-        self._ptr = 0
-        if isinstance(obj, Response):
-            self._text, self.encoding = obj.body, obj.encoding
-        else:
-            self._text, self.encoding = obj, 'utf-8'
-        self._is_unicode = isinstance(self._text, unicode)
-
-    def read(self, n=65535):
-        self.read = self._read_unicode if self._is_unicode else self._read_string
-        return self.read(n).lstrip()
-
-    def _read_string(self, n=65535):
-        s, e = self._ptr, self._ptr + n
-        self._ptr = e
-        return self._text[s:e]
-
-    def _read_unicode(self, n=65535):
-        s, e = self._ptr, self._ptr + n
-        self._ptr = e
-        return self._text[s:e].encode('utf-8')
diff --git a/scrapy/core/downloader/__init__.py b/scrapy/core/downloader/__init__.py
index a5e62a75d..dc5cf1ab8 100644
--- a/scrapy/core/downloader/__init__.py
+++ b/scrapy/core/downloader/__init__.py
@@ -1,26 +1,26 @@
 import random
-import warnings
 from time import time
+from datetime import datetime
 from collections import deque
 
-from twisted.internet import reactor, defer, task
+from twisted.internet import defer, task
 
 from scrapy.utils.defer import mustbe_deferred
 from scrapy.utils.httpobj import urlparse_cached
 from scrapy.resolver import dnscache
-from scrapy.exceptions import ScrapyDeprecationWarning
 from scrapy import signals
-from .middleware import DownloaderMiddlewareManager
-from .handlers import DownloadHandlers
+from scrapy.core.downloader.middleware import DownloaderMiddlewareManager
+from scrapy.core.downloader.handlers import DownloadHandlers
 
 
-class Slot(object):
+class Slot:
     """Downloader slot"""
 
-    def __init__(self, concurrency, delay, settings):
+    def __init__(self, concurrency, delay, randomize_delay):
         self.concurrency = concurrency
         self.delay = delay
-        self.randomize_delay = settings.getbool('RANDOMIZE_DOWNLOAD_DELAY')
+        self.randomize_delay = randomize_delay
+
         self.active = set()
         self.queue = deque()
         self.transferring = set()
@@ -39,31 +39,36 @@ class Slot(object):
         if self.latercall and self.latercall.active():
             self.latercall.cancel()
 
+    def __repr__(self):
+        cls_name = self.__class__.__name__
+        return "%s(concurrency=%r, delay=%0.2f, randomize_delay=%r)" % (
+            cls_name, self.concurrency, self.delay, self.randomize_delay)
+
+    def __str__(self):
+        return (
+            "<downloader.Slot concurrency=%r delay=%0.2f randomize_delay=%r "
+            "len(active)=%d len(queue)=%d len(transferring)=%d lastseen=%s>" % (
+                self.concurrency, self.delay, self.randomize_delay,
+                len(self.active), len(self.queue), len(self.transferring),
+                datetime.fromtimestamp(self.lastseen).isoformat()
+            )
+        )
+
 
 def _get_concurrency_delay(concurrency, spider, settings):
     delay = settings.getfloat('DOWNLOAD_DELAY')
-    if hasattr(spider, 'DOWNLOAD_DELAY'):
-        warnings.warn("%s.DOWNLOAD_DELAY attribute is deprecated, use %s.download_delay instead" %
-                      (type(spider).__name__, type(spider).__name__))
-        delay = spider.DOWNLOAD_DELAY
     if hasattr(spider, 'download_delay'):
         delay = spider.download_delay
 
-    # TODO: remove for Scrapy 0.15
-    c = settings.getint('CONCURRENT_REQUESTS_PER_SPIDER')
-    if c:
-        warnings.warn("CONCURRENT_REQUESTS_PER_SPIDER setting is deprecated, "
-                      "use CONCURRENT_REQUESTS_PER_DOMAIN instead", ScrapyDeprecationWarning)
-        concurrency = c
-    # ----------------------------
-
     if hasattr(spider, 'max_concurrent_requests'):
         concurrency = spider.max_concurrent_requests
 
     return concurrency, delay
 
 
-class Downloader(object):
+class Downloader:
+
+    DOWNLOAD_SLOT = 'download_slot'
 
     def __init__(self, crawler):
         self.settings = crawler.settings
@@ -74,6 +79,7 @@ class Downloader(object):
         self.total_concurrency = self.settings.getint('CONCURRENT_REQUESTS')
         self.domain_concurrency = self.settings.getint('CONCURRENT_REQUESTS_PER_DOMAIN')
         self.ip_concurrency = self.settings.getint('CONCURRENT_REQUESTS_PER_IP')
+        self.randomize_delay = self.settings.getbool('RANDOMIZE_DOWNLOAD_DELAY')
         self.middleware = DownloaderMiddlewareManager.from_crawler(crawler)
         self._slot_gc_loop = task.LoopingCall(self._slot_gc)
         self._slot_gc_loop.start(60)
@@ -95,13 +101,13 @@ class Downloader(object):
         if key not in self.slots:
             conc = self.ip_concurrency if self.ip_concurrency else self.domain_concurrency
             conc, delay = _get_concurrency_delay(conc, spider, self.settings)
-            self.slots[key] = Slot(conc, delay, self.settings)
+            self.slots[key] = Slot(conc, delay, self.randomize_delay)
 
         return key, self.slots[key]
 
     def _get_slot_key(self, request, spider):
-        if 'download_slot' in request.meta:
-            return request.meta['download_slot']
+        if self.DOWNLOAD_SLOT in request.meta:
+            return request.meta[self.DOWNLOAD_SLOT]
 
         key = urlparse_cached(request).hostname or ''
         if self.ip_concurrency:
@@ -111,19 +117,23 @@ class Downloader(object):
 
     def _enqueue_request(self, request, spider):
         key, slot = self._get_slot(request, spider)
-        request.meta['download_slot'] = key
+        request.meta[self.DOWNLOAD_SLOT] = key
 
         def _deactivate(response):
             slot.active.remove(request)
             return response
 
         slot.active.add(request)
+        self.signals.send_catch_log(signal=signals.request_reached_downloader,
+                                    request=request,
+                                    spider=spider)
         deferred = defer.Deferred().addBoth(_deactivate)
         slot.queue.append((request, deferred))
         self._process_queue(spider, slot)
         return deferred
 
     def _process_queue(self, spider, slot):
+        from twisted.internet import reactor
         if slot.latercall and slot.latercall.active():
             return
 
@@ -163,7 +173,7 @@ class Downloader(object):
             return response
         dfd.addCallback(_downloaded)
 
-        # 3. After response arrives,  remove the request from transferring
+        # 3. After response arrives, remove the request from transferring
         # state to free up the transferring slot so it can be used by the
         # following requests (perhaps those which came from the downloader
         # middleware itself)
@@ -172,17 +182,20 @@ class Downloader(object):
         def finish_transferring(_):
             slot.transferring.remove(request)
             self._process_queue(spider, slot)
+            self.signals.send_catch_log(signal=signals.request_left_downloader,
+                                        request=request,
+                                        spider=spider)
             return _
 
         return dfd.addBoth(finish_transferring)
 
     def close(self):
         self._slot_gc_loop.stop()
-        for slot in self.slots.itervalues():
+        for slot in self.slots.values():
             slot.close()
 
     def _slot_gc(self, age=60):
         mintime = time() - age
-        for key, slot in self.slots.items():
+        for key, slot in list(self.slots.items()):
             if not slot.active and slot.lastseen + slot.delay < mintime:
                 self.slots.pop(key).close()
diff --git a/scrapy/core/downloader/contextfactory.py b/scrapy/core/downloader/contextfactory.py
index e20830c71..8a7d656a1 100644
--- a/scrapy/core/downloader/contextfactory.py
+++ b/scrapy/core/downloader/contextfactory.py
@@ -1,20 +1,94 @@
 from OpenSSL import SSL
-from twisted.internet.ssl import ClientContextFactory
+from twisted.internet.ssl import optionsForClientTLS, CertificateOptions, platformTrust, AcceptableCiphers
+from twisted.web.client import BrowserLikePolicyForHTTPS
+from twisted.web.iweb import IPolicyForHTTPS
+from zope.interface.declarations import implementer
+
+from scrapy.core.downloader.tls import ScrapyClientTLSOptions, DEFAULT_CIPHERS
 
 
-class ScrapyClientContextFactory(ClientContextFactory):
-    "A SSL context factory which is more permissive against SSL bugs."
-    # see https://github.com/scrapy/scrapy/issues/82
-    # and https://github.com/scrapy/scrapy/issues/26
+@implementer(IPolicyForHTTPS)
+class ScrapyClientContextFactory(BrowserLikePolicyForHTTPS):
+    """
+    Non-peer-certificate verifying HTTPS context factory
 
-    def __init__(self):
-        # see this issue on why we use TLSv1_METHOD by default
-        # https://github.com/scrapy/scrapy/issues/194
-        self.method = SSL.TLSv1_METHOD
+    Default OpenSSL method is TLS_METHOD (also called SSLv23_METHOD)
+    which allows TLS protocol negotiation
 
+    'A TLS/SSL connection established with [this method] may
+     understand the SSLv3, TLSv1, TLSv1.1 and TLSv1.2 protocols.'
+    """
+
+    def __init__(self, method=SSL.SSLv23_METHOD, tls_verbose_logging=False, tls_ciphers=None, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._ssl_method = method
+        self.tls_verbose_logging = tls_verbose_logging
+        if tls_ciphers:
+            self.tls_ciphers = AcceptableCiphers.fromOpenSSLCipherString(tls_ciphers)
+        else:
+            self.tls_ciphers = DEFAULT_CIPHERS
+
+    @classmethod
+    def from_settings(cls, settings, method=SSL.SSLv23_METHOD, *args, **kwargs):
+        tls_verbose_logging = settings.getbool('DOWNLOADER_CLIENT_TLS_VERBOSE_LOGGING')
+        tls_ciphers = settings['DOWNLOADER_CLIENT_TLS_CIPHERS']
+        return cls(method=method, tls_verbose_logging=tls_verbose_logging, tls_ciphers=tls_ciphers, *args, **kwargs)
+
+    def getCertificateOptions(self):
+        # setting verify=True will require you to provide CAs
+        # to verify against; in other words: it's not that simple
+
+        # backward-compatible SSL/TLS method:
+        #
+        # * this will respect `method` attribute in often recommended
+        #   `ScrapyClientContextFactory` subclass
+        #   (https://github.com/scrapy/scrapy/issues/1429#issuecomment-131782133)
+        #
+        # * getattr() for `_ssl_method` attribute for context factories
+        #   not calling super().__init__
+        return CertificateOptions(
+            verify=False,
+            method=getattr(self, 'method', getattr(self, '_ssl_method', None)),
+            fixBrokenPeers=True,
+            acceptableCiphers=self.tls_ciphers,
+        )
+
+    # kept for old-style HTTP/1.0 downloader context twisted calls,
+    # e.g. connectSSL()
     def getContext(self, hostname=None, port=None):
-        ctx = ClientContextFactory.getContext(self)
-        # Enable all workarounds to SSL bugs as documented by
-        # http://www.openssl.org/docs/ssl/SSL_CTX_set_options.html
-        ctx.set_options(SSL.OP_ALL)
-        return ctx
+        return self.getCertificateOptions().getContext()
+
+    def creatorForNetloc(self, hostname, port):
+        return ScrapyClientTLSOptions(hostname.decode("ascii"), self.getContext(),
+                                      verbose_logging=self.tls_verbose_logging)
+
+
+@implementer(IPolicyForHTTPS)
+class BrowserLikeContextFactory(ScrapyClientContextFactory):
+    """
+    Twisted-recommended context factory for web clients.
+
+    Quoting the documentation of the :class:`~twisted.web.client.Agent` class:
+
+        The default is to use a
+        :class:`~twisted.web.client.BrowserLikePolicyForHTTPS`, so unless you
+        have special requirements you can leave this as-is.
+
+    :meth:`creatorForNetloc` is the same as
+    :class:`~twisted.web.client.BrowserLikePolicyForHTTPS` except this context
+    factory allows setting the TLS/SSL method to use.
+
+    The default OpenSSL method is ``TLS_METHOD`` (also called
+    ``SSLv23_METHOD``) which allows TLS protocol negotiation.
+    """
+    def creatorForNetloc(self, hostname, port):
+
+        # trustRoot set to platformTrust() will use the platform's root CAs.
+        #
+        # This means that a website like https://www.cacert.org will be rejected
+        # by default, since CAcert.org CA certificate is seldom shipped.
+        return optionsForClientTLS(
+            hostname=hostname.decode("ascii"),
+            trustRoot=platformTrust(),
+            extraCertificateOptions={'method': self._ssl_method},
+        )
diff --git a/scrapy/core/downloader/handlers/__init__.py b/scrapy/core/downloader/handlers/__init__.py
index ea0842e62..e86680978 100644
--- a/scrapy/core/downloader/handlers/__init__.py
+++ b/scrapy/core/downloader/handlers/__init__.py
@@ -1,44 +1,79 @@
 """Download handlers for different schemes"""
 
+import logging
+
 from twisted.internet import defer
-import six
-from scrapy.exceptions import NotSupported, NotConfigured
-from scrapy.utils.httpobj import urlparse_cached
-from scrapy.utils.misc import load_object
+
 from scrapy import signals
+from scrapy.exceptions import NotConfigured, NotSupported
+from scrapy.utils.httpobj import urlparse_cached
+from scrapy.utils.misc import create_instance, load_object
+from scrapy.utils.python import without_none_values
 
 
-class DownloadHandlers(object):
+logger = logging.getLogger(__name__)
+
+
+class DownloadHandlers:
 
     def __init__(self, crawler):
-        self._handlers = {}
-        self._notconfigured = {}
-        handlers = crawler.settings.get('DOWNLOAD_HANDLERS_BASE')
-        handlers.update(crawler.settings.get('DOWNLOAD_HANDLERS', {}))
-        for scheme, clspath in six.iteritems(handlers):
-            # Allow to disable a handler just like any other
-            # component (extension, middleware, etc).
-            if clspath is None:
-                continue
-            cls = load_object(clspath)
-            try:
-                dh = cls(crawler.settings)
-            except NotConfigured as ex:
-                self._notconfigured[scheme] = str(ex)
-            else:
-                self._handlers[scheme] = dh
+        self._crawler = crawler
+        self._schemes = {}  # stores acceptable schemes on instancing
+        self._handlers = {}  # stores instanced handlers for schemes
+        self._notconfigured = {}  # remembers failed handlers
+        handlers = without_none_values(
+            crawler.settings.getwithbase('DOWNLOAD_HANDLERS'))
+        for scheme, clspath in handlers.items():
+            self._schemes[scheme] = clspath
+            self._load_handler(scheme, skip_lazy=True)
 
         crawler.signals.connect(self._close, signals.engine_stopped)
 
+    def _get_handler(self, scheme):
+        """Lazy-load the downloadhandler for a scheme
+        only on the first request for that scheme.
+        """
+        if scheme in self._handlers:
+            return self._handlers[scheme]
+        if scheme in self._notconfigured:
+            return None
+        if scheme not in self._schemes:
+            self._notconfigured[scheme] = 'no handler available for that scheme'
+            return None
+
+        return self._load_handler(scheme)
+
+    def _load_handler(self, scheme, skip_lazy=False):
+        path = self._schemes[scheme]
+        try:
+            dhcls = load_object(path)
+            if skip_lazy and getattr(dhcls, 'lazy', True):
+                return None
+            dh = create_instance(
+                objcls=dhcls,
+                settings=self._crawler.settings,
+                crawler=self._crawler,
+            )
+        except NotConfigured as ex:
+            self._notconfigured[scheme] = str(ex)
+            return None
+        except Exception as ex:
+            logger.error('Loading "%(clspath)s" for scheme "%(scheme)s"',
+                         {"clspath": path, "scheme": scheme},
+                         exc_info=True, extra={'crawler': self._crawler})
+            self._notconfigured[scheme] = str(ex)
+            return None
+        else:
+            self._handlers[scheme] = dh
+            return dh
+
     def download_request(self, request, spider):
         scheme = urlparse_cached(request).scheme
-        try:
-            handler = self._handlers[scheme].download_request
-        except KeyError:
-            msg = self._notconfigured.get(scheme, \
-                    'no handler available for that scheme')
-            raise NotSupported("Unsupported URL scheme '%s': %s" % (scheme, msg))
-        return handler(request, spider)
+        handler = self._get_handler(scheme)
+        if not handler:
+            raise NotSupported("Unsupported URL scheme '%s': %s" %
+                               (scheme, self._notconfigured[scheme]))
+        return handler.download_request(request, spider)
 
     @defer.inlineCallbacks
     def _close(self, *_a, **_kw):
diff --git a/scrapy/core/downloader/handlers/datauri.py b/scrapy/core/downloader/handlers/datauri.py
new file mode 100644
index 000000000..a45b4ff3c
--- /dev/null
+++ b/scrapy/core/downloader/handlers/datauri.py
@@ -0,0 +1,22 @@
+from w3lib.url import parse_data_uri
+
+from scrapy.http import TextResponse
+from scrapy.responsetypes import responsetypes
+from scrapy.utils.decorators import defers
+
+
+class DataURIDownloadHandler:
+    lazy = False
+
+    @defers
+    def download_request(self, request, spider):
+        uri = parse_data_uri(request.url)
+        respcls = responsetypes.from_mimetype(uri.media_type)
+
+        resp_kwargs = {}
+        if (issubclass(respcls, TextResponse)
+                and uri.media_type.split('/')[0] == 'text'):
+            charset = uri.media_type_parameters.get('charset')
+            resp_kwargs['encoding'] = charset
+
+        return respcls(url=request.url, body=uri.data, **resp_kwargs)
diff --git a/scrapy/core/downloader/handlers/file.py b/scrapy/core/downloader/handlers/file.py
index 85bad1583..0d94e3df0 100644
--- a/scrapy/core/downloader/handlers/file.py
+++ b/scrapy/core/downloader/handlers/file.py
@@ -1,15 +1,16 @@
 from w3lib.url import file_uri_to_path
+
 from scrapy.responsetypes import responsetypes
-from scrapy.utils.decorator import defers
+from scrapy.utils.decorators import defers
 
-class FileDownloadHandler(object):
 
-    def __init__(self, settings):
-        pass
+class FileDownloadHandler:
+    lazy = False
 
     @defers
     def download_request(self, request, spider):
         filepath = file_uri_to_path(request.url)
-        body = open(filepath, 'rb').read()
+        with open(filepath, 'rb') as fo:
+            body = fo.read()
         respcls = responsetypes.from_args(filename=filepath, body=body)
         return respcls(url=request.url, body=body)
diff --git a/scrapy/core/downloader/handlers/ftp.py b/scrapy/core/downloader/handlers/ftp.py
index d96e37fef..3ef129587 100644
--- a/scrapy/core/downloader/handlers/ftp.py
+++ b/scrapy/core/downloader/handlers/ftp.py
@@ -30,19 +30,21 @@ In case of status 200 request, response.headers will come with two keys:
 
 import re
 from io import BytesIO
-from six.moves.urllib.parse import urlparse
+from urllib.parse import unquote
 
-from twisted.internet import reactor
-from twisted.protocols.ftp import FTPClient, CommandFailed
-from twisted.internet.protocol import Protocol, ClientCreator
+from twisted.internet.protocol import ClientCreator, Protocol
+from twisted.protocols.ftp import CommandFailed, FTPClient
 
 from scrapy.http import Response
 from scrapy.responsetypes import responsetypes
+from scrapy.utils.httpobj import urlparse_cached
+from scrapy.utils.python import to_bytes
+
 
 class ReceivedDataProtocol(Protocol):
     def __init__(self, filename=None):
         self.__filename = filename
-        self.body = open(filename, "w") if filename else BytesIO()
+        self.body = open(filename, "wb") if filename else BytesIO()
         self.size = 0
 
     def dataReceived(self, data):
@@ -56,33 +58,47 @@ class ReceivedDataProtocol(Protocol):
     def close(self):
         self.body.close() if self.filename else self.body.seek(0)
 
-_CODE_RE = re.compile("\d+")
-class FTPDownloadHandler(object):
+
+_CODE_RE = re.compile(r"\d+")
+
+
+class FTPDownloadHandler:
+    lazy = False
 
     CODE_MAPPING = {
         "550": 404,
         "default": 503,
     }
 
-    def __init__(self, setting):
-        pass
+    def __init__(self, settings):
+        self.default_user = settings['FTP_USER']
+        self.default_password = settings['FTP_PASSWORD']
+        self.passive_mode = settings['FTP_PASSIVE_MODE']
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        return cls(crawler.settings)
 
     def download_request(self, request, spider):
-        parsed_url = urlparse(request.url)
-        creator = ClientCreator(reactor, FTPClient, request.meta["ftp_user"],
-                                    request.meta["ftp_password"],
-                                    passive=request.meta.get("ftp_passive", 1))
-        return creator.connectTCP(parsed_url.hostname, parsed_url.port or 21).addCallback(self.gotClient,
-                                request, parsed_url.path)
+        from twisted.internet import reactor
+        parsed_url = urlparse_cached(request)
+        user = request.meta.get("ftp_user", self.default_user)
+        password = request.meta.get("ftp_password", self.default_password)
+        passive_mode = 1 if bool(request.meta.get("ftp_passive",
+                                                  self.passive_mode)) else 0
+        creator = ClientCreator(reactor, FTPClient, user, password, passive=passive_mode)
+        dfd = creator.connectTCP(parsed_url.hostname, parsed_url.port or 21)
+        return dfd.addCallback(self.gotClient, request, unquote(parsed_url.path))
 
     def gotClient(self, client, request, filepath):
         self.client = client
         protocol = ReceivedDataProtocol(request.meta.get("ftp_local_filename"))
-        return client.retrieveFile(filepath, protocol)\
-                .addCallbacks(callback=self._build_response,
-                        callbackArgs=(request, protocol),
-                        errback=self._failed,
-                        errbackArgs=(request,))
+        return client.retrieveFile(filepath, protocol).addCallbacks(
+            callback=self._build_response,
+            callbackArgs=(request, protocol),
+            errback=self._failed,
+            errbackArgs=(request,),
+        )
 
     def _build_response(self, result, request, protocol):
         self.result = result
@@ -90,7 +106,7 @@ class FTPDownloadHandler(object):
         protocol.close()
         body = protocol.filename or protocol.body.read()
         headers = {"local filename": protocol.filename or '', "size": protocol.size}
-        return respcls(url=request.url, status=200, body=body, headers=headers)
+        return respcls(url=request.url, status=200, body=to_bytes(body), headers=headers)
 
     def _failed(self, result, request):
         message = result.getErrorMessage()
@@ -99,6 +115,5 @@ class FTPDownloadHandler(object):
             if m:
                 ftpcode = m.group()
                 httpcode = self.CODE_MAPPING.get(ftpcode, self.CODE_MAPPING["default"])
-                return Response(url=request.url, status=httpcode, body=message)
+                return Response(url=request.url, status=httpcode, body=to_bytes(message))
         raise result.type(result.value)
-
diff --git a/scrapy/core/downloader/handlers/http.py b/scrapy/core/downloader/handlers/http.py
index 1efebb939..52535bd8b 100644
--- a/scrapy/core/downloader/handlers/http.py
+++ b/scrapy/core/downloader/handlers/http.py
@@ -1,19 +1,4 @@
-from scrapy import optional_features
-from .http10 import HTTP10DownloadHandler
-
-if 'http11' in optional_features:
-    from .http11 import HTTP11DownloadHandler as HTTPDownloadHandler
-else:
-    HTTPDownloadHandler = HTTP10DownloadHandler
-
-
-# backwards compatibility
-class HttpDownloadHandler(HTTP10DownloadHandler):
-
-    def __init__(self, *args, **kwargs):
-        import warnings
-        from scrapy.exceptions import ScrapyDeprecationWarning
-        warnings.warn('HttpDownloadHandler is deprecated, import scrapy.core.downloader'
-                      '.handlers.http10.HTTP10DownloadHandler instead',
-                      category=ScrapyDeprecationWarning, stacklevel=1)
-        super(HttpDownloadHandler, self).__init__(*args, **kwargs)
+from scrapy.core.downloader.handlers.http10 import HTTP10DownloadHandler
+from scrapy.core.downloader.handlers.http11 import (
+    HTTP11DownloadHandler as HTTPDownloadHandler,
+)
diff --git a/scrapy/core/downloader/handlers/http10.py b/scrapy/core/downloader/handlers/http10.py
index 11b2acdae..c0146a0a6 100644
--- a/scrapy/core/downloader/handlers/http10.py
+++ b/scrapy/core/downloader/handlers/http10.py
@@ -1,14 +1,21 @@
 """Download handlers for http and https schemes
 """
-from twisted.internet import reactor
-from scrapy.utils.misc import load_object
+from scrapy.utils.misc import create_instance, load_object
+from scrapy.utils.python import to_unicode
 
 
-class HTTP10DownloadHandler(object):
+class HTTP10DownloadHandler:
+    lazy = False
 
-    def __init__(self, settings):
+    def __init__(self, settings, crawler=None):
         self.HTTPClientFactory = load_object(settings['DOWNLOADER_HTTPCLIENTFACTORY'])
         self.ClientContextFactory = load_object(settings['DOWNLOADER_CLIENTCONTEXTFACTORY'])
+        self._settings = settings
+        self._crawler = crawler
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        return cls(crawler.settings, crawler)
 
     def download_request(self, request, spider):
         """Return a deferred for the HTTP download"""
@@ -17,9 +24,14 @@ class HTTP10DownloadHandler(object):
         return factory.deferred
 
     def _connect(self, factory):
-        host, port = factory.host, factory.port
-        if factory.scheme == 'https':
-            return reactor.connectSSL(host, port, factory,
-                                      self.ClientContextFactory())
+        from twisted.internet import reactor
+        host, port = to_unicode(factory.host), factory.port
+        if factory.scheme == b'https':
+            client_context_factory = create_instance(
+                objcls=self.ClientContextFactory,
+                settings=self._settings,
+                crawler=self._crawler,
+            )
+            return reactor.connectSSL(host, port, factory, client_context_factory)
         else:
             return reactor.connectTCP(host, port, factory)
diff --git a/scrapy/core/downloader/handlers/http11.py b/scrapy/core/downloader/handlers/http11.py
index 23cd07c51..fb04d1fb7 100644
--- a/scrapy/core/downloader/handlers/http11.py
+++ b/scrapy/core/downloader/handlers/http11.py
@@ -1,42 +1,112 @@
 """Download handlers for http and https schemes"""
 
+import ipaddress
+import logging
 import re
-
+import warnings
+from contextlib import suppress
 from io import BytesIO
 from time import time
-from six.moves.urllib.parse import urldefrag
+from urllib.parse import urldefrag
 
-from zope.interface import implements
-from twisted.internet import defer, reactor, protocol
-from twisted.web.http_headers import Headers as TxHeaders
-from twisted.web.iweb import IBodyProducer
+from twisted.internet import defer, protocol, ssl
+from twisted.internet.endpoints import TCP4ClientEndpoint
 from twisted.internet.error import TimeoutError
-from twisted.web.http import PotentialDataLoss
-from scrapy.xlib.tx import Agent, ProxyAgent, ResponseDone, \
-    HTTPConnectionPool, TCP4ClientEndpoint
+from twisted.python.failure import Failure
+from twisted.web.client import Agent, HTTPConnectionPool, ResponseDone, ResponseFailed, URI
+from twisted.web.http import _DataLoss, PotentialDataLoss
+from twisted.web.http_headers import Headers as TxHeaders
+from twisted.web.iweb import IBodyProducer, UNKNOWN_LENGTH
+from zope.interface import implementer
 
+from scrapy import signals
+from scrapy.core.downloader.tls import openssl_methods
+from scrapy.core.downloader.webclient import _parse
+from scrapy.exceptions import ScrapyDeprecationWarning, StopDownload
 from scrapy.http import Headers
 from scrapy.responsetypes import responsetypes
-from scrapy.core.downloader.webclient import _parse
-from scrapy.utils.misc import load_object
+from scrapy.utils.misc import create_instance, load_object
+from scrapy.utils.python import to_bytes, to_unicode
 
 
-class HTTP11DownloadHandler(object):
+logger = logging.getLogger(__name__)
 
-    def __init__(self, settings):
+
+class HTTP11DownloadHandler:
+    lazy = False
+
+    def __init__(self, settings, crawler=None):
+        self._crawler = crawler
+
+        from twisted.internet import reactor
         self._pool = HTTPConnectionPool(reactor, persistent=True)
         self._pool.maxPersistentPerHost = settings.getint('CONCURRENT_REQUESTS_PER_DOMAIN')
         self._pool._factory.noisy = False
+
+        self._sslMethod = openssl_methods[settings.get('DOWNLOADER_CLIENT_TLS_METHOD')]
         self._contextFactoryClass = load_object(settings['DOWNLOADER_CLIENTCONTEXTFACTORY'])
-        self._contextFactory = self._contextFactoryClass()
+        # try method-aware context factory
+        try:
+            self._contextFactory = create_instance(
+                objcls=self._contextFactoryClass,
+                settings=settings,
+                crawler=crawler,
+                method=self._sslMethod,
+            )
+        except TypeError:
+            # use context factory defaults
+            self._contextFactory = create_instance(
+                objcls=self._contextFactoryClass,
+                settings=settings,
+                crawler=crawler,
+            )
+            msg = """
+ '%s' does not accept `method` argument (type OpenSSL.SSL method,\
+ e.g. OpenSSL.SSL.SSLv23_METHOD) and/or `tls_verbose_logging` argument and/or `tls_ciphers` argument.\
+ Please upgrade your context factory class to handle them or ignore them.""" % (
+                settings['DOWNLOADER_CLIENTCONTEXTFACTORY'],)
+            warnings.warn(msg)
+        self._default_maxsize = settings.getint('DOWNLOAD_MAXSIZE')
+        self._default_warnsize = settings.getint('DOWNLOAD_WARNSIZE')
+        self._fail_on_dataloss = settings.getbool('DOWNLOAD_FAIL_ON_DATALOSS')
+        self._disconnect_timeout = 1
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        return cls(crawler.settings, crawler)
 
     def download_request(self, request, spider):
         """Return a deferred for the HTTP download"""
-        agent = ScrapyAgent(contextFactory=self._contextFactory, pool=self._pool)
+        agent = ScrapyAgent(
+            contextFactory=self._contextFactory,
+            pool=self._pool,
+            maxsize=getattr(spider, 'download_maxsize', self._default_maxsize),
+            warnsize=getattr(spider, 'download_warnsize', self._default_warnsize),
+            fail_on_dataloss=self._fail_on_dataloss,
+            crawler=self._crawler,
+        )
         return agent.download_request(request)
 
     def close(self):
-        return self._pool.closeCachedConnections()
+        from twisted.internet import reactor
+        d = self._pool.closeCachedConnections()
+        # closeCachedConnections will hang on network or server issues, so
+        # we'll manually timeout the deferred.
+        #
+        # Twisted issue addressing this problem can be found here:
+        # https://twistedmatrix.com/trac/ticket/7738.
+        #
+        # closeCachedConnections doesn't handle external errbacks, so we'll
+        # issue a callback after `_disconnect_timeout` seconds.
+        delayed_call = reactor.callLater(self._disconnect_timeout, d.callback, [])
+
+        def cancel_delayed_call(result):
+            if delayed_call.active():
+                delayed_call.cancel()
+            return result
+
+        d.addBoth(cancel_delayed_call)
+        return d
 
 
 class TunnelError(Exception):
@@ -52,44 +122,55 @@ class TunnelingTCP4ClientEndpoint(TCP4ClientEndpoint):
     for it.
     """
 
-    _responseMatcher = re.compile('HTTP/1\.. 200')
+    _responseMatcher = re.compile(br'HTTP/1\.. (?P<status>\d{3})(?P<reason>.{,32})')
 
-    def __init__(self, reactor, host, port, proxyConf, contextFactory,
-                 timeout=30, bindAddress=None):
+    def __init__(self, reactor, host, port, proxyConf, contextFactory, timeout=30, bindAddress=None):
         proxyHost, proxyPort, self._proxyAuthHeader = proxyConf
-        super(TunnelingTCP4ClientEndpoint, self).__init__(reactor, proxyHost,
-            proxyPort, timeout, bindAddress)
+        super().__init__(reactor, proxyHost, proxyPort, timeout, bindAddress)
         self._tunnelReadyDeferred = defer.Deferred()
         self._tunneledHost = host
         self._tunneledPort = port
         self._contextFactory = contextFactory
+        self._connectBuffer = bytearray()
 
     def requestTunnel(self, protocol):
         """Asks the proxy to open a tunnel."""
-        tunnelReq = 'CONNECT %s:%s HTTP/1.1\r\n' % (self._tunneledHost,
-                                                  self._tunneledPort)
-        if self._proxyAuthHeader:
-            tunnelReq += 'Proxy-Authorization: %s\r\n' % self._proxyAuthHeader
-        tunnelReq += '\r\n'
+        tunnelReq = tunnel_request_data(self._tunneledHost, self._tunneledPort, self._proxyAuthHeader)
         protocol.transport.write(tunnelReq)
         self._protocolDataReceived = protocol.dataReceived
         protocol.dataReceived = self.processProxyResponse
         self._protocol = protocol
         return protocol
 
-    def processProxyResponse(self, bytes):
+    def processProxyResponse(self, rcvd_bytes):
         """Processes the response from the proxy. If the tunnel is successfully
         created, notifies the client that we are ready to send requests. If not
         raises a TunnelError.
         """
+        self._connectBuffer += rcvd_bytes
+        # make sure that enough (all) bytes are consumed
+        # and that we've got all HTTP headers (ending with a blank line)
+        # from the proxy so that we don't send those bytes to the TLS layer
+        #
+        # see https://github.com/scrapy/scrapy/issues/2491
+        if b'\r\n\r\n' not in self._connectBuffer:
+            return
         self._protocol.dataReceived = self._protocolDataReceived
-        if  TunnelingTCP4ClientEndpoint._responseMatcher.match(bytes):
-            self._protocol.transport.startTLS(self._contextFactory,
-                                              self._protocolFactory)
+        respm = TunnelingTCP4ClientEndpoint._responseMatcher.match(self._connectBuffer)
+        if respm and int(respm.group('status')) == 200:
+            # set proper Server Name Indication extension
+            sslOptions = self._contextFactory.creatorForNetloc(self._tunneledHost, self._tunneledPort)
+            self._protocol.transport.startTLS(sslOptions, self._protocolFactory)
             self._tunnelReadyDeferred.callback(self._protocol)
         else:
+            if respm:
+                extra = {'status': int(respm.group('status')),
+                         'reason': respm.group('reason').strip()}
+            else:
+                extra = rcvd_bytes[:32]
             self._tunnelReadyDeferred.errback(
-                TunnelError('Could not open CONNECT tunnel.'))
+                TunnelError('Could not open CONNECT tunnel with proxy %s:%s [%r]' % (
+                    self._host, self._port, extra)))
 
     def connectFailed(self, reason):
         """Propagates the errback to the appropriate deferred."""
@@ -97,13 +178,33 @@ class TunnelingTCP4ClientEndpoint(TCP4ClientEndpoint):
 
     def connect(self, protocolFactory):
         self._protocolFactory = protocolFactory
-        connectDeferred = super(TunnelingTCP4ClientEndpoint,
-                                self).connect(protocolFactory)
+        connectDeferred = super().connect(protocolFactory)
         connectDeferred.addCallback(self.requestTunnel)
         connectDeferred.addErrback(self.connectFailed)
         return self._tunnelReadyDeferred
 
 
+def tunnel_request_data(host, port, proxy_auth_header=None):
+    r"""
+    Return binary content of a CONNECT request.
+
+    >>> from scrapy.utils.python import to_unicode as s
+    >>> s(tunnel_request_data("example.com", 8080))
+    'CONNECT example.com:8080 HTTP/1.1\r\nHost: example.com:8080\r\n\r\n'
+    >>> s(tunnel_request_data("example.com", 8080, b"123"))
+    'CONNECT example.com:8080 HTTP/1.1\r\nHost: example.com:8080\r\nProxy-Authorization: 123\r\n\r\n'
+    >>> s(tunnel_request_data(b"example.com", "8090"))
+    'CONNECT example.com:8090 HTTP/1.1\r\nHost: example.com:8090\r\n\r\n'
+    """
+    host_value = to_bytes(host, encoding='ascii') + b':' + to_bytes(str(port))
+    tunnel_req = b'CONNECT ' + host_value + b' HTTP/1.1\r\n'
+    tunnel_req += b'Host: ' + host_value + b'\r\n'
+    if proxy_auth_header:
+        tunnel_req += b'Proxy-Authorization: ' + proxy_auth_header + b'\r\n'
+    tunnel_req += b'\r\n'
+    return tunnel_req
+
+
 class TunnelingAgent(Agent):
     """An agent that uses a L{TunnelingTCP4ClientEndpoint} to make HTTPS
     downloads. It may look strange that we have chosen to subclass Agent and not
@@ -114,64 +215,143 @@ class TunnelingAgent(Agent):
 
     def __init__(self, reactor, proxyConf, contextFactory=None,
                  connectTimeout=None, bindAddress=None, pool=None):
-        super(TunnelingAgent, self).__init__(reactor, contextFactory,
-            connectTimeout, bindAddress, pool)
+        super().__init__(reactor, contextFactory, connectTimeout, bindAddress, pool)
         self._proxyConf = proxyConf
         self._contextFactory = contextFactory
 
-    def _getEndpoint(self, scheme, host, port):
-        return TunnelingTCP4ClientEndpoint(self._reactor, host, port,
-            self._proxyConf, self._contextFactory, self._connectTimeout,
-            self._bindAddress)
+    def _getEndpoint(self, uri):
+        return TunnelingTCP4ClientEndpoint(
+            reactor=self._reactor,
+            host=uri.host,
+            port=uri.port,
+            proxyConf=self._proxyConf,
+            contextFactory=self._contextFactory,
+            timeout=self._endpointFactory._connectTimeout,
+            bindAddress=self._endpointFactory._bindAddress,
+        )
+
+    def _requestWithEndpoint(self, key, endpoint, method, parsedURI, headers, bodyProducer, requestPath):
+        # proxy host and port are required for HTTP pool `key`
+        # otherwise, same remote host connection request could reuse
+        # a cached tunneled connection to a different proxy
+        key = key + self._proxyConf
+        return super()._requestWithEndpoint(
+            key=key,
+            endpoint=endpoint,
+            method=method,
+            parsedURI=parsedURI,
+            headers=headers,
+            bodyProducer=bodyProducer,
+            requestPath=requestPath,
+        )
 
 
-class ScrapyAgent(object):
+class ScrapyProxyAgent(Agent):
+
+    def __init__(self, reactor, proxyURI, connectTimeout=None, bindAddress=None, pool=None):
+        super().__init__(
+            reactor=reactor,
+            connectTimeout=connectTimeout,
+            bindAddress=bindAddress,
+            pool=pool,
+        )
+        self._proxyURI = URI.fromBytes(proxyURI)
+
+    def request(self, method, uri, headers=None, bodyProducer=None):
+        """
+        Issue a new request via the configured proxy.
+        """
+        # Cache *all* connections under the same key, since we are only
+        # connecting to a single destination, the proxy:
+        return self._requestWithEndpoint(
+            key=("http-proxy", self._proxyURI.host, self._proxyURI.port),
+            endpoint=self._getEndpoint(self._proxyURI),
+            method=method,
+            parsedURI=URI.fromBytes(uri),
+            headers=headers,
+            bodyProducer=bodyProducer,
+            requestPath=uri,
+        )
+
+
+class ScrapyAgent:
 
     _Agent = Agent
-    _ProxyAgent = ProxyAgent
+    _ProxyAgent = ScrapyProxyAgent
     _TunnelingAgent = TunnelingAgent
 
-    def __init__(self, contextFactory=None, connectTimeout=10, bindAddress=None, pool=None):
+    def __init__(self, contextFactory=None, connectTimeout=10, bindAddress=None, pool=None,
+                 maxsize=0, warnsize=0, fail_on_dataloss=True, crawler=None):
         self._contextFactory = contextFactory
         self._connectTimeout = connectTimeout
         self._bindAddress = bindAddress
         self._pool = pool
+        self._maxsize = maxsize
+        self._warnsize = warnsize
+        self._fail_on_dataloss = fail_on_dataloss
+        self._txresponse = None
+        self._crawler = crawler
 
     def _get_agent(self, request, timeout):
+        from twisted.internet import reactor
         bindaddress = request.meta.get('bindaddress') or self._bindAddress
         proxy = request.meta.get('proxy')
         if proxy:
             _, _, proxyHost, proxyPort, proxyParams = _parse(proxy)
             scheme = _parse(request.url)[0]
-            omitConnectTunnel = proxyParams.find('noconnect') >= 0
-            if  scheme == 'https' and not omitConnectTunnel:
-                proxyConf = (proxyHost, proxyPort,
-                             request.headers.get('Proxy-Authorization', None))
-                return self._TunnelingAgent(reactor, proxyConf,
-                    contextFactory=self._contextFactory, connectTimeout=timeout,
-                    bindAddress=bindaddress, pool=self._pool)
+            proxyHost = to_unicode(proxyHost)
+            omitConnectTunnel = b'noconnect' in proxyParams
+            if omitConnectTunnel:
+                warnings.warn("Using HTTPS proxies in the noconnect mode is deprecated. "
+                              "If you use Crawlera, it doesn't require this mode anymore, "
+                              "so you should update scrapy-crawlera to 1.3.0+ "
+                              "and remove '?noconnect' from the Crawlera URL.",
+                              ScrapyDeprecationWarning)
+            if scheme == b'https' and not omitConnectTunnel:
+                proxyAuth = request.headers.get(b'Proxy-Authorization', None)
+                proxyConf = (proxyHost, proxyPort, proxyAuth)
+                return self._TunnelingAgent(
+                    reactor=reactor,
+                    proxyConf=proxyConf,
+                    contextFactory=self._contextFactory,
+                    connectTimeout=timeout,
+                    bindAddress=bindaddress,
+                    pool=self._pool,
+                )
             else:
-                endpoint = TCP4ClientEndpoint(reactor, proxyHost, proxyPort,
-                    timeout=timeout, bindAddress=bindaddress)
-                return self._ProxyAgent(endpoint)
+                return self._ProxyAgent(
+                    reactor=reactor,
+                    proxyURI=to_bytes(proxy, encoding='ascii'),
+                    connectTimeout=timeout,
+                    bindAddress=bindaddress,
+                    pool=self._pool,
+                )
 
-        return self._Agent(reactor, contextFactory=self._contextFactory,
-            connectTimeout=timeout, bindAddress=bindaddress, pool=self._pool)
+        return self._Agent(
+            reactor=reactor,
+            contextFactory=self._contextFactory,
+            connectTimeout=timeout,
+            bindAddress=bindaddress,
+            pool=self._pool,
+        )
 
     def download_request(self, request):
+        from twisted.internet import reactor
         timeout = request.meta.get('download_timeout') or self._connectTimeout
         agent = self._get_agent(request, timeout)
 
         # request details
         url = urldefrag(request.url)[0]
-        method = request.method
+        method = to_bytes(request.method)
         headers = TxHeaders(request.headers)
         if isinstance(agent, self._TunnelingAgent):
-            headers.removeHeader('Proxy-Authorization')
-        bodyproducer = _RequestBodyProducer(request.body) if request.body else None
-
+            headers.removeHeader(b'Proxy-Authorization')
+        if request.body:
+            bodyproducer = _RequestBodyProducer(request.body)
+        else:
+            bodyproducer = None
         start_time = time()
-        d = agent.request(method, url, headers, bodyproducer)
+        d = agent.request(method, to_bytes(url, encoding='ascii'), headers, bodyproducer)
         # set download latency
         d.addCallback(self._cb_latency, request, start_time)
         # response body is ready to be consumed
@@ -186,6 +366,11 @@ class ScrapyAgent(object):
         if self._timeout_cl.active():
             self._timeout_cl.cancel()
             return result
+        # needed for HTTPS requests, otherwise _ResponseReader doesn't
+        # receive connectionLost()
+        if self._txresponse:
+            self._txresponse._transport.stopProducing()
+
         raise TimeoutError("Getting %s took longer than %s seconds." % (url, timeout))
 
     def _cb_latency(self, result, request, start_time):
@@ -195,25 +380,75 @@ class ScrapyAgent(object):
     def _cb_bodyready(self, txresponse, request):
         # deliverBody hangs for responses without body
         if txresponse.length == 0:
-            return txresponse, '', None
+            return {
+                "txresponse": txresponse,
+                "body": b"",
+                "flags": None,
+                "certificate": None,
+                "ip_address": None,
+            }
+
+        maxsize = request.meta.get('download_maxsize', self._maxsize)
+        warnsize = request.meta.get('download_warnsize', self._warnsize)
+        expected_size = txresponse.length if txresponse.length != UNKNOWN_LENGTH else -1
+        fail_on_dataloss = request.meta.get('download_fail_on_dataloss', self._fail_on_dataloss)
+
+        if maxsize and expected_size > maxsize:
+            error_msg = ("Cancelling download of %(url)s: expected response "
+                         "size (%(size)s) larger than download max size (%(maxsize)s).")
+            error_args = {'url': request.url, 'size': expected_size, 'maxsize': maxsize}
+
+            logger.error(error_msg, error_args)
+            txresponse._transport._producer.loseConnection()
+            raise defer.CancelledError(error_msg % error_args)
+
+        if warnsize and expected_size > warnsize:
+            logger.warning("Expected response size (%(size)s) larger than "
+                           "download warn size (%(warnsize)s) in request %(request)s.",
+                           {'size': expected_size, 'warnsize': warnsize, 'request': request})
 
         def _cancel(_):
-            txresponse._transport._producer.loseConnection()
+            # Abort connection immediately.
+            txresponse._transport._producer.abortConnection()
 
         d = defer.Deferred(_cancel)
-        txresponse.deliverBody(_ResponseReader(d, txresponse, request))
+        txresponse.deliverBody(
+            _ResponseReader(
+                finished=d,
+                txresponse=txresponse,
+                request=request,
+                maxsize=maxsize,
+                warnsize=warnsize,
+                fail_on_dataloss=fail_on_dataloss,
+                crawler=self._crawler,
+            )
+        )
+
+        # save response for timeouts
+        self._txresponse = txresponse
+
         return d
 
     def _cb_bodydone(self, result, request, url):
-        txresponse, body, flags = result
-        status = int(txresponse.code)
-        headers = Headers(txresponse.headers.getAllRawHeaders())
-        respcls = responsetypes.from_args(headers=headers, url=url)
-        return respcls(url=url, status=status, headers=headers, body=body, flags=flags)
+        headers = Headers(result["txresponse"].headers.getAllRawHeaders())
+        respcls = responsetypes.from_args(headers=headers, url=url, body=result["body"])
+        response = respcls(
+            url=url,
+            status=int(result["txresponse"].code),
+            headers=headers,
+            body=result["body"],
+            flags=result["flags"],
+            certificate=result["certificate"],
+            ip_address=result["ip_address"],
+        )
+        if result.get("failure"):
+            result["failure"].value.response = response
+            return result["failure"]
+        return response
 
 
-class _RequestBodyProducer(object):
-    implements(IBodyProducer)
+@implementer(IBodyProducer)
+class _RequestBodyProducer:
 
     def __init__(self, body):
         self.body = body
@@ -232,23 +467,100 @@ class _RequestBodyProducer(object):
 
 class _ResponseReader(protocol.Protocol):
 
-    def __init__(self, finished, txresponse, request):
+    def __init__(self, finished, txresponse, request, maxsize, warnsize, fail_on_dataloss, crawler):
         self._finished = finished
         self._txresponse = txresponse
         self._request = request
         self._bodybuf = BytesIO()
+        self._maxsize = maxsize
+        self._warnsize = warnsize
+        self._fail_on_dataloss = fail_on_dataloss
+        self._fail_on_dataloss_warned = False
+        self._reached_warnsize = False
+        self._bytes_received = 0
+        self._certificate = None
+        self._ip_address = None
+        self._crawler = crawler
+
+    def _finish_response(self, flags=None, failure=None):
+        self._finished.callback({
+            "txresponse": self._txresponse,
+            "body": self._bodybuf.getvalue(),
+            "flags": flags,
+            "certificate": self._certificate,
+            "ip_address": self._ip_address,
+            "failure": failure,
+        })
+
+    def connectionMade(self):
+        if self._certificate is None:
+            with suppress(AttributeError):
+                self._certificate = ssl.Certificate(self.transport._producer.getPeerCertificate())
+
+        if self._ip_address is None:
+            self._ip_address = ipaddress.ip_address(self.transport._producer.getPeer().host)
 
     def dataReceived(self, bodyBytes):
+        # This maybe called several times after cancel was called with buffered data.
+        if self._finished.called:
+            return
+
         self._bodybuf.write(bodyBytes)
+        self._bytes_received += len(bodyBytes)
+
+        bytes_received_result = self._crawler.signals.send_catch_log(
+            signal=signals.bytes_received,
+            data=bodyBytes,
+            request=self._request,
+            spider=self._crawler.spider,
+        )
+        for handler, result in bytes_received_result:
+            if isinstance(result, Failure) and isinstance(result.value, StopDownload):
+                logger.debug("Download stopped for %(request)s from signal handler %(handler)s",
+                             {"request": self._request, "handler": handler.__qualname__})
+                self.transport._producer.loseConnection()
+                failure = result if result.value.fail else None
+                self._finish_response(flags=["download_stopped"], failure=failure)
+
+        if self._maxsize and self._bytes_received > self._maxsize:
+            logger.error("Received (%(bytes)s) bytes larger than download "
+                         "max size (%(maxsize)s) in request %(request)s.",
+                         {'bytes': self._bytes_received,
+                          'maxsize': self._maxsize,
+                          'request': self._request})
+            # Clear buffer earlier to avoid keeping data in memory for a long time.
+            self._bodybuf.truncate(0)
+            self._finished.cancel()
+
+        if self._warnsize and self._bytes_received > self._warnsize and not self._reached_warnsize:
+            self._reached_warnsize = True
+            logger.warning("Received more bytes than download "
+                           "warn size (%(warnsize)s) in request %(request)s.",
+                           {'warnsize': self._warnsize,
+                            'request': self._request})
 
     def connectionLost(self, reason):
         if self._finished.called:
             return
 
-        body = self._bodybuf.getvalue()
         if reason.check(ResponseDone):
-            self._finished.callback((self._txresponse, body, None))
-        elif reason.check(PotentialDataLoss):
-            self._finished.callback((self._txresponse, body, ['partial']))
-        else:
-            self._finished.errback(reason)
+            self._finish_response()
+            return
+
+        if reason.check(PotentialDataLoss):
+            self._finish_response(flags=["partial"])
+            return
+
+        if reason.check(ResponseFailed) and any(r.check(_DataLoss) for r in reason.value.reasons):
+            if not self._fail_on_dataloss:
+                self._finish_response(flags=["dataloss"])
+                return
+
+            elif not self._fail_on_dataloss_warned:
+                logger.warning("Got data loss in %s. If you want to process broken "
+                               "responses set the setting DOWNLOAD_FAIL_ON_DATALOSS = False"
+                               " -- This message won't be shown in further requests",
+                               self._txresponse.request.absoluteURI.decode())
+                self._fail_on_dataloss_warned = True
+
+        self._finished.errback(reason)
diff --git a/scrapy/core/downloader/handlers/s3.py b/scrapy/core/downloader/handlers/s3.py
index 09a76b7b7..8f63ad974 100644
--- a/scrapy/core/downloader/handlers/s3.py
+++ b/scrapy/core/downloader/handlers/s3.py
@@ -1,49 +1,84 @@
-from scrapy import optional_features
+from urllib.parse import unquote
+
+from scrapy.core.downloader.handlers.http import HTTPDownloadHandler
 from scrapy.exceptions import NotConfigured
+from scrapy.utils.boto import is_botocore
 from scrapy.utils.httpobj import urlparse_cached
-from .http import HTTPDownloadHandler
+from scrapy.utils.misc import create_instance
 
-try:
+
+def _get_boto_connection():
     from boto.s3.connection import S3Connection
-except ImportError:
-    S3Connection = object
 
-class _v19_S3Connection(S3Connection):
-    """A dummy S3Connection wrapper that doesn't do any syncronous download"""
-    def _mexe(self, method, bucket, key, headers, *args, **kwargs):
-        return headers
+    class _v19_S3Connection(S3Connection):
+        """A dummy S3Connection wrapper that doesn't do any synchronous download"""
+        def _mexe(self, method, bucket, key, headers, *args, **kwargs):
+            return headers
 
-class _v20_S3Connection(S3Connection):
-    """A dummy S3Connection wrapper that doesn't do any syncronous download"""
-    def _mexe(self, http_request, *args, **kwargs):
-        http_request.authorize(connection=self)
-        return http_request.headers
+    class _v20_S3Connection(S3Connection):
+        """A dummy S3Connection wrapper that doesn't do any synchronous download"""
+        def _mexe(self, http_request, *args, **kwargs):
+            http_request.authorize(connection=self)
+            return http_request.headers
 
-try:
-    import boto.auth
-except ImportError:
-    _S3Connection = _v19_S3Connection
-else:
-    _S3Connection = _v20_S3Connection
+    try:
+        import boto.auth  # noqa: F401
+    except ImportError:
+        _S3Connection = _v19_S3Connection
+    else:
+        _S3Connection = _v20_S3Connection
+
+    return _S3Connection
 
 
-class S3DownloadHandler(object):
-
-    def __init__(self, settings, aws_access_key_id=None, aws_secret_access_key=None, \
-            httpdownloadhandler=HTTPDownloadHandler):
-        if 'boto' not in optional_features:
-            raise NotConfigured("missing boto library")
+class S3DownloadHandler:
 
+    def __init__(self, settings, *,
+                 crawler=None,
+                 aws_access_key_id=None, aws_secret_access_key=None,
+                 httpdownloadhandler=HTTPDownloadHandler, **kw):
         if not aws_access_key_id:
             aws_access_key_id = settings['AWS_ACCESS_KEY_ID']
         if not aws_secret_access_key:
             aws_secret_access_key = settings['AWS_SECRET_ACCESS_KEY']
 
-        try:
-            self.conn = _S3Connection(aws_access_key_id, aws_secret_access_key)
-        except Exception as ex:
-            raise NotConfigured(str(ex))
-        self._download_http = httpdownloadhandler(settings).download_request
+        # If no credentials could be found anywhere,
+        # consider this an anonymous connection request by default;
+        # unless 'anon' was set explicitly (True/False).
+        anon = kw.get('anon')
+        if anon is None and not aws_access_key_id and not aws_secret_access_key:
+            kw['anon'] = True
+        self.anon = kw.get('anon')
+
+        self._signer = None
+        if is_botocore():
+            import botocore.auth
+            import botocore.credentials
+            kw.pop('anon', None)
+            if kw:
+                raise TypeError('Unexpected keyword arguments: %s' % kw)
+            if not self.anon:
+                SignerCls = botocore.auth.AUTH_TYPE_MAPS['s3']
+                self._signer = SignerCls(botocore.credentials.Credentials(
+                    aws_access_key_id, aws_secret_access_key))
+        else:
+            _S3Connection = _get_boto_connection()
+            try:
+                self.conn = _S3Connection(
+                    aws_access_key_id, aws_secret_access_key, **kw)
+            except Exception as ex:
+                raise NotConfigured(str(ex))
+
+        _http_handler = create_instance(
+            objcls=httpdownloadhandler,
+            settings=settings,
+            crawler=crawler,
+        )
+        self._download_http = _http_handler.download_request
+
+    @classmethod
+    def from_crawler(cls, crawler, **kwargs):
+        return cls(crawler.settings, crawler=crawler, **kwargs)
 
     def download_request(self, request, spider):
         p = urlparse_cached(request)
@@ -51,12 +86,26 @@ class S3DownloadHandler(object):
         bucket = p.hostname
         path = p.path + '?' + p.query if p.query else p.path
         url = '%s://%s.s3.amazonaws.com%s' % (scheme, bucket, path)
-        signed_headers = self.conn.make_request(
+        if self.anon:
+            request = request.replace(url=url)
+        elif self._signer is not None:
+            import botocore.awsrequest
+            awsrequest = botocore.awsrequest.AWSRequest(
+                method=request.method,
+                url='%s://s3.amazonaws.com/%s%s' % (scheme, bucket, path),
+                headers=request.headers.to_unicode_dict(),
+                data=request.body)
+            self._signer.add_auth(awsrequest)
+            request = request.replace(
+                url=url, headers=awsrequest.headers.items())
+        else:
+            signed_headers = self.conn.make_request(
                 method=request.method,
                 bucket=bucket,
-                key=p.path,
-                query_args=p.query,
+                key=unquote(p.path),
+                query_args=unquote(p.query),
                 headers=request.headers,
-                data=request.body)
-        httpreq = request.replace(url=url, headers=signed_headers)
-        return self._download_http(httpreq, spider)
+                data=request.body,
+            )
+            request = request.replace(url=url, headers=signed_headers)
+        return self._download_http(request, spider)
diff --git a/scrapy/core/downloader/middleware.py b/scrapy/core/downloader/middleware.py
index dcc588ef2..4c2eea522 100644
--- a/scrapy/core/downloader/middleware.py
+++ b/scrapy/core/downloader/middleware.py
@@ -3,64 +3,77 @@ Downloader Middleware manager
 
 See documentation in docs/topics/downloader-middleware.rst
 """
+from twisted.internet import defer
 
+from scrapy.exceptions import _InvalidOutput
 from scrapy.http import Request, Response
 from scrapy.middleware import MiddlewareManager
-from scrapy.utils.defer import mustbe_deferred
+from scrapy.utils.defer import mustbe_deferred, deferred_from_coro
 from scrapy.utils.conf import build_component_list
 
+
 class DownloaderMiddlewareManager(MiddlewareManager):
 
     component_name = 'downloader middleware'
 
     @classmethod
     def _get_mwlist_from_settings(cls, settings):
-        return build_component_list(settings['DOWNLOADER_MIDDLEWARES_BASE'], \
-            settings['DOWNLOADER_MIDDLEWARES'])
+        return build_component_list(
+            settings.getwithbase('DOWNLOADER_MIDDLEWARES'))
 
     def _add_middleware(self, mw):
         if hasattr(mw, 'process_request'):
             self.methods['process_request'].append(mw.process_request)
         if hasattr(mw, 'process_response'):
-            self.methods['process_response'].insert(0, mw.process_response)
+            self.methods['process_response'].appendleft(mw.process_response)
         if hasattr(mw, 'process_exception'):
-            self.methods['process_exception'].insert(0, mw.process_exception)
+            self.methods['process_exception'].appendleft(mw.process_exception)
 
     def download(self, download_func, request, spider):
+        @defer.inlineCallbacks
         def process_request(request):
             for method in self.methods['process_request']:
-                response = method(request=request, spider=spider)
-                assert response is None or isinstance(response, (Response, Request)), \
-                        'Middleware %s.process_request must return None, Response or Request, got %s' % \
-                        (method.im_self.__class__.__name__, response.__class__.__name__)
+                response = yield deferred_from_coro(method(request=request, spider=spider))
+                if response is not None and not isinstance(response, (Response, Request)):
+                    raise _InvalidOutput(
+                        "Middleware %s.process_request must return None, Response or Request, got %s"
+                        % (method.__self__.__class__.__name__, response.__class__.__name__)
+                    )
                 if response:
                     return response
-            return download_func(request=request, spider=spider)
+            return (yield download_func(request=request, spider=spider))
 
+        @defer.inlineCallbacks
         def process_response(response):
-            assert response is not None, 'Received None in process_response'
-            if isinstance(response, Request):
+            if response is None:
+                raise TypeError("Received None in process_response")
+            elif isinstance(response, Request):
                 return response
 
             for method in self.methods['process_response']:
-                response = method(request=request, response=response, spider=spider)
-                assert isinstance(response, (Response, Request)), \
-                    'Middleware %s.process_response must return Response or Request, got %s' % \
-                    (method.im_self.__class__.__name__, type(response))
+                response = yield deferred_from_coro(method(request=request, response=response, spider=spider))
+                if not isinstance(response, (Response, Request)):
+                    raise _InvalidOutput(
+                        "Middleware %s.process_response must return Response or Request, got %s"
+                        % (method.__self__.__class__.__name__, type(response))
+                    )
                 if isinstance(response, Request):
                     return response
             return response
 
-        def process_exception(_failure):
-            exception = _failure.value
+        @defer.inlineCallbacks
+        def process_exception(failure):
+            exception = failure.value
             for method in self.methods['process_exception']:
-                response = method(request=request, exception=exception, spider=spider)
-                assert response is None or isinstance(response, (Response, Request)), \
-                    'Middleware %s.process_exception must return None, Response or Request, got %s' % \
-                    (method.im_self.__class__.__name__, type(response))
+                response = yield deferred_from_coro(method(request=request, exception=exception, spider=spider))
+                if response is not None and not isinstance(response, (Response, Request)):
+                    raise _InvalidOutput(
+                        "Middleware %s.process_exception must return None, Response or Request, got %s"
+                        % (method.__self__.__class__.__name__, type(response))
+                    )
                 if response:
                     return response
-            return _failure
+            return failure
 
         deferred = mustbe_deferred(process_request, request)
         deferred.addErrback(process_exception)
diff --git a/scrapy/core/downloader/tls.py b/scrapy/core/downloader/tls.py
new file mode 100644
index 000000000..d9f3750d5
--- /dev/null
+++ b/scrapy/core/downloader/tls.py
@@ -0,0 +1,93 @@
+import logging
+
+from OpenSSL import SSL
+from service_identity.exceptions import CertificateError
+from twisted.internet._sslverify import ClientTLSOptions, verifyHostname, VerificationError
+from twisted.internet.ssl import AcceptableCiphers
+
+from scrapy import twisted_version
+from scrapy.utils.ssl import x509name_to_string, get_temp_key_info
+
+
+logger = logging.getLogger(__name__)
+
+
+METHOD_SSLv3 = 'SSLv3'
+METHOD_TLS = 'TLS'
+METHOD_TLSv10 = 'TLSv1.0'
+METHOD_TLSv11 = 'TLSv1.1'
+METHOD_TLSv12 = 'TLSv1.2'
+
+
+openssl_methods = {
+    METHOD_TLS: SSL.SSLv23_METHOD,                      # protocol negotiation (recommended)
+    METHOD_SSLv3: SSL.SSLv3_METHOD,                     # SSL 3 (NOT recommended)
+    METHOD_TLSv10: SSL.TLSv1_METHOD,                    # TLS 1.0 only
+    METHOD_TLSv11: getattr(SSL, 'TLSv1_1_METHOD', 5),   # TLS 1.1 only
+    METHOD_TLSv12: getattr(SSL, 'TLSv1_2_METHOD', 6),   # TLS 1.2 only
+}
+
+
+if twisted_version < (17, 0, 0):
+    from twisted.internet._sslverify import _maybeSetHostNameIndication as set_tlsext_host_name
+else:
+    def set_tlsext_host_name(connection, hostNameBytes):
+        connection.set_tlsext_host_name(hostNameBytes)
+
+
+class ScrapyClientTLSOptions(ClientTLSOptions):
+    """
+    SSL Client connection creator ignoring certificate verification errors
+    (for genuinely invalid certificates or bugs in verification code).
+
+    Same as Twisted's private _sslverify.ClientTLSOptions,
+    except that VerificationError, CertificateError and ValueError
+    exceptions are caught, so that the connection is not closed, only
+    logging warnings. Also, HTTPS connection parameters logging is added.
+    """
+
+    def __init__(self, hostname, ctx, verbose_logging=False):
+        super().__init__(hostname, ctx)
+        self.verbose_logging = verbose_logging
+
+    def _identityVerifyingInfoCallback(self, connection, where, ret):
+        if where & SSL.SSL_CB_HANDSHAKE_START:
+            set_tlsext_host_name(connection, self._hostnameBytes)
+        elif where & SSL.SSL_CB_HANDSHAKE_DONE:
+            if self.verbose_logging:
+                if hasattr(connection, 'get_cipher_name'):  # requires pyOPenSSL 0.15
+                    if hasattr(connection, 'get_protocol_version_name'):  # requires pyOPenSSL 16.0.0
+                        logger.debug('SSL connection to %s using protocol %s, cipher %s',
+                                     self._hostnameASCII,
+                                     connection.get_protocol_version_name(),
+                                     connection.get_cipher_name(),
+                                     )
+                    else:
+                        logger.debug('SSL connection to %s using cipher %s',
+                                     self._hostnameASCII,
+                                     connection.get_cipher_name(),
+                                     )
+                server_cert = connection.get_peer_certificate()
+                logger.debug('SSL connection certificate: issuer "%s", subject "%s"',
+                             x509name_to_string(server_cert.get_issuer()),
+                             x509name_to_string(server_cert.get_subject()),
+                             )
+                key_info = get_temp_key_info(connection._ssl)
+                if key_info:
+                    logger.debug('SSL temp key: %s', key_info)
+
+            try:
+                verifyHostname(connection, self._hostnameASCII)
+            except (CertificateError, VerificationError) as e:
+                logger.warning(
+                    'Remote certificate is not valid for hostname "{}"; {}'.format(
+                        self._hostnameASCII, e))
+
+            except ValueError as e:
+                logger.warning(
+                    'Ignoring error while verifying certificate '
+                    'from host "{}" (exception: {})'.format(
+                        self._hostnameASCII, repr(e)))
+
+
+DEFAULT_CIPHERS = AcceptableCiphers.fromOpenSSLCipherString('DEFAULT')
diff --git a/scrapy/core/downloader/webclient.py b/scrapy/core/downloader/webclient.py
index 2c6a61b8a..b2b96f1ea 100644
--- a/scrapy/core/downloader/webclient.py
+++ b/scrapy/core/downloader/webclient.py
@@ -1,27 +1,36 @@
 from time import time
-from six.moves.urllib.parse import urlparse, urlunparse, urldefrag
+from urllib.parse import urlparse, urlunparse, urldefrag
 
-from twisted.web.client import HTTPClientFactory
 from twisted.web.http import HTTPClient
-from twisted.internet import defer
+from twisted.internet import defer, reactor
+from twisted.internet.protocol import ClientFactory
 
 from scrapy.http import Headers
 from scrapy.utils.httpobj import urlparse_cached
+from scrapy.utils.python import to_bytes
 from scrapy.responsetypes import responsetypes
 
 
 def _parsed_url_args(parsed):
+    # Assume parsed is urlparse-d from Request.url,
+    # which was passed via safe_url_string and is ascii-only.
     path = urlunparse(('', '', parsed.path or '/', parsed.params, parsed.query, ''))
-    host = parsed.hostname
+    path = to_bytes(path, encoding="ascii")
+    host = to_bytes(parsed.hostname, encoding="ascii")
     port = parsed.port
-    scheme = parsed.scheme
-    netloc = parsed.netloc
+    scheme = to_bytes(parsed.scheme, encoding="ascii")
+    netloc = to_bytes(parsed.netloc, encoding="ascii")
     if port is None:
-        port = 443 if scheme == 'https' else 80
+        port = 443 if scheme == b'https' else 80
     return scheme, netloc, host, port, path
 
 
 def _parse(url):
+    """ Return tuple of (scheme, netloc, host, port, path),
+    all in bytes except for port which is int.
+    Assume url is from Request.url, which was passed via safe_url_string
+    and is ascii-only.
+    """
     url = url.strip()
     parsed = urlparse(url)
     return _parsed_url_args(parsed)
@@ -29,10 +38,10 @@ def _parse(url):
 
 class ScrapyHTTPPageGetter(HTTPClient):
 
-    delimiter = '\n'
+    delimiter = b'\n'
 
     def connectionMade(self):
-        self.headers = Headers() # bucket for response headers
+        self.headers = Headers()  # bucket for response headers
 
         # Method command
         self.sendCommand(self.factory.method, self.factory.path)
@@ -63,8 +72,8 @@ class ScrapyHTTPPageGetter(HTTPClient):
         self.factory.noPage(reason)
 
     def handleResponse(self, response):
-        if self.factory.method.upper() == 'HEAD':
-            self.factory.page('')
+        if self.factory.method.upper() == b'HEAD':
+            self.factory.page(b'')
         elif self.length is not None and self.length > 0:
             self.factory.noPage(self._connection_lost_reason)
         else:
@@ -73,26 +82,49 @@ class ScrapyHTTPPageGetter(HTTPClient):
 
     def timeout(self):
         self.transport.loseConnection()
-        self.factory.noPage(\
-                defer.TimeoutError("Getting %s took longer than %s seconds." % \
-                (self.factory.url, self.factory.timeout)))
+
+        # transport cleanup needed for HTTPS connections
+        if self.factory.url.startswith(b'https'):
+            self.transport.stopProducing()
+
+        self.factory.noPage(
+            defer.TimeoutError("Getting %s took longer than %s seconds."
+                               % (self.factory.url, self.factory.timeout)))
 
 
-class ScrapyHTTPClientFactory(HTTPClientFactory):
-    """Scrapy implementation of the HTTPClientFactory overwriting the
-    serUrl method to make use of our Url object that cache the parse
-    result.
-    """
+# This class used to inherit from Twisted’s
+# twisted.web.client.HTTPClientFactory. When that class was deprecated in
+# Twisted (https://github.com/twisted/twisted/pull/643), we merged its
+# non-overriden code into this class.
+class ScrapyHTTPClientFactory(ClientFactory):
 
     protocol = ScrapyHTTPPageGetter
+
     waiting = 1
     noisy = False
     followRedirect = False
     afterFoundGet = False
 
+    def _build_response(self, body, request):
+        request.meta['download_latency'] = self.headers_time - self.start_time
+        status = int(self.status)
+        headers = Headers(self.response_headers)
+        respcls = responsetypes.from_args(headers=headers, url=self._url)
+        return respcls(url=self._url, status=status, headers=headers, body=body)
+
+    def _set_connection_attributes(self, request):
+        parsed = urlparse_cached(request)
+        self.scheme, self.netloc, self.host, self.port, self.path = _parsed_url_args(parsed)
+        proxy = request.meta.get('proxy')
+        if proxy:
+            self.scheme, _, self.host, self.port, _ = _parse(proxy)
+            self.path = self.url
+
     def __init__(self, request, timeout=180):
-        self.url = urldefrag(request.url)[0]
-        self.method = request.method
+        self._url = urldefrag(request.url)[0]
+        # converting to bytes to comply to Twisted interface
+        self.url = to_bytes(self._url, encoding='ascii')
+        self.method = to_bytes(request.method, encoding='ascii')
         self.body = request.body or None
         self.headers = Headers(request.headers)
         self.response_headers = None
@@ -118,23 +150,63 @@ class ScrapyHTTPClientFactory(HTTPClientFactory):
             self.headers['Content-Length'] = len(self.body)
             # just in case a broken http/1.1 decides to keep connection alive
             self.headers.setdefault("Connection", "close")
+        # Content-Length must be specified in POST method even with no body
+        elif self.method == b'POST':
+            self.headers['Content-Length'] = 0
 
-    def _build_response(self, body, request):
-        request.meta['download_latency'] = self.headers_time-self.start_time
-        status = int(self.status)
-        headers = Headers(self.response_headers)
-        respcls = responsetypes.from_args(headers=headers, url=self.url)
-        return respcls(url=self.url, status=status, headers=headers, body=body)
+    def __repr__(self):
+        return "<%s: %s>" % (self.__class__.__name__, self.url)
 
-    def _set_connection_attributes(self, request):
-        parsed = urlparse_cached(request)
-        self.scheme, self.netloc, self.host, self.port, self.path = _parsed_url_args(parsed)
-        proxy = request.meta.get('proxy')
-        if proxy:
-            self.scheme, _, self.host, self.port, _ = _parse(proxy)
-            self.path = self.url
+    def _cancelTimeout(self, result, timeoutCall):
+        if timeoutCall.active():
+            timeoutCall.cancel()
+        return result
+
+    def buildProtocol(self, addr):
+        p = ClientFactory.buildProtocol(self, addr)
+        p.followRedirect = self.followRedirect
+        p.afterFoundGet = self.afterFoundGet
+        if self.timeout:
+            timeoutCall = reactor.callLater(self.timeout, p.timeout)
+            self.deferred.addBoth(self._cancelTimeout, timeoutCall)
+        return p
 
     def gotHeaders(self, headers):
         self.headers_time = time()
         self.response_headers = headers
 
+    def gotStatus(self, version, status, message):
+        """
+        Set the status of the request on us.
+        @param version: The HTTP version.
+        @type version: L{bytes}
+        @param status: The HTTP status code, an integer represented as a
+            bytestring.
+        @type status: L{bytes}
+        @param message: The HTTP status message.
+        @type message: L{bytes}
+        """
+        self.version, self.status, self.message = version, status, message
+
+    def page(self, page):
+        if self.waiting:
+            self.waiting = 0
+            self.deferred.callback(page)
+
+    def noPage(self, reason):
+        if self.waiting:
+            self.waiting = 0
+            self.deferred.errback(reason)
+
+    def clientConnectionFailed(self, _, reason):
+        """
+        When a connection attempt fails, the request cannot be issued.  If no
+        result has yet been provided to the result Deferred, provide the
+        connection failure reason as an error result.
+        """
+        if self.waiting:
+            self.waiting = 0
+            # If the connection attempt failed, there is nothing more to
+            # disconnect, so just fire that Deferred now.
+            self._disconnectedDeferred.callback(None)
+            self.deferred.errback(reason)
diff --git a/scrapy/core/engine.py b/scrapy/core/engine.py
index 717b34764..5e0dfe37c 100644
--- a/scrapy/core/engine.py
+++ b/scrapy/core/engine.py
@@ -4,30 +4,33 @@ This is the Scrapy engine which controls the Scheduler, Downloader and Spiders.
 For more information see docs/topics/architecture.rst
 
 """
-import warnings
+import logging
 from time import time
 
-from twisted.internet import defer
+from twisted.internet import defer, task
 from twisted.python.failure import Failure
 
-from scrapy import log, signals
-from scrapy.core.downloader import Downloader
+from scrapy import signals
 from scrapy.core.scraper import Scraper
-from scrapy.exceptions import DontCloseSpider, ScrapyDeprecationWarning
+from scrapy.exceptions import DontCloseSpider
 from scrapy.http import Response, Request
 from scrapy.utils.misc import load_object
 from scrapy.utils.reactor import CallLaterOnce
+from scrapy.utils.log import logformatter_adapter, failure_to_exc_info
+
+logger = logging.getLogger(__name__)
 
 
-class Slot(object):
+class Slot:
 
     def __init__(self, start_requests, close_if_idle, nextcall, scheduler):
         self.closing = False
-        self.inprogress = set() # requests in progress
+        self.inprogress = set()  # requests in progress
         self.start_requests = iter(start_requests)
         self.close_if_idle = close_if_idle
         self.nextcall = nextcall
         self.scheduler = scheduler
+        self.heartbeat = task.LoopingCall(nextcall.schedule)
 
     def add_request(self, request):
         self.inprogress.add(request)
@@ -45,10 +48,12 @@ class Slot(object):
         if self.closing and not self.inprogress:
             if self.nextcall:
                 self.nextcall.cancel()
+                if self.heartbeat.running:
+                    self.heartbeat.stop()
             self.closing.callback(None)
 
 
-class ExecutionEngine(object):
+class ExecutionEngine:
 
     def __init__(self, crawler, spider_closed_callback):
         self.crawler = crawler
@@ -63,16 +68,13 @@ class ExecutionEngine(object):
         downloader_cls = load_object(self.settings['DOWNLOADER'])
         self.downloader = downloader_cls(crawler)
         self.scraper = Scraper(crawler)
-        self._concurrent_spiders = self.settings.getint('CONCURRENT_SPIDERS', 1)
-        if self._concurrent_spiders != 1:
-            warnings.warn("CONCURRENT_SPIDERS settings is deprecated, use " \
-                "Scrapyd max_proc config instead", ScrapyDeprecationWarning)
         self._spider_closed_callback = spider_closed_callback
 
     @defer.inlineCallbacks
     def start(self):
         """Start the execution engine"""
-        assert not self.running, "Engine already running"
+        if self.running:
+            raise RuntimeError("Engine already running")
         self.start_time = time()
         yield self.signals.send_catch_log_deferred(signal=signals.engine_started)
         self.running = True
@@ -81,11 +83,27 @@ class ExecutionEngine(object):
 
     def stop(self):
         """Stop the execution engine gracefully"""
-        assert self.running, "Engine not running"
+        if not self.running:
+            raise RuntimeError("Engine not running")
         self.running = False
         dfd = self._close_all_spiders()
         return dfd.addBoth(lambda _: self._finish_stopping_engine())
 
+    def close(self):
+        """Close the execution engine gracefully.
+
+        If it has already been started, stop it. In all cases, close all spiders
+        and the downloader.
+        """
+        if self.running:
+            # Will also close spiders and downloader
+            return self.stop()
+        elif self.open_spiders:
+            # Will also close downloader
+            return self._close_all_spiders()
+        else:
+            return defer.succeed(self.downloader.close())
+
     def pause(self):
         """Pause the execution engine"""
         self.paused = True
@@ -100,7 +118,6 @@ class ExecutionEngine(object):
             return
 
         if self.paused:
-            slot.nextcall.schedule(5)
             return
 
         while not self._needs_backout(spider):
@@ -112,10 +129,10 @@ class ExecutionEngine(object):
                 request = next(slot.start_requests)
             except StopIteration:
                 slot.start_requests = None
-            except Exception as exc:
+            except Exception:
                 slot.start_requests = None
-                log.err(None, 'Obtaining request from start requests', \
-                        spider=spider)
+                logger.error('Error while obtaining start requests',
+                             exc_info=True, extra={'spider': spider})
             else:
                 self.crawl(request, spider)
 
@@ -124,10 +141,12 @@ class ExecutionEngine(object):
 
     def _needs_backout(self, spider):
         slot = self.slot
-        return not self.running \
-            or slot.closing \
-            or self.downloader.needs_backout() \
+        return (
+            not self.running
+            or slot.closing
+            or self.downloader.needs_backout()
             or self.scraper.slot.needs_backout()
+        )
 
     def _next_request_from_scheduler(self, spider):
         slot = self.slot
@@ -136,31 +155,54 @@ class ExecutionEngine(object):
             return
         d = self._download(request, spider)
         d.addBoth(self._handle_downloader_output, request, spider)
-        d.addErrback(log.msg, spider=spider)
+        d.addErrback(lambda f: logger.info('Error while handling downloader output',
+                                           exc_info=failure_to_exc_info(f),
+                                           extra={'spider': spider}))
         d.addBoth(lambda _: slot.remove_request(request))
-        d.addErrback(log.msg, spider=spider)
+        d.addErrback(lambda f: logger.info('Error while removing request from slot',
+                                           exc_info=failure_to_exc_info(f),
+                                           extra={'spider': spider}))
         d.addBoth(lambda _: slot.nextcall.schedule())
-        d.addErrback(log.msg, spider=spider)
+        d.addErrback(lambda f: logger.info('Error while scheduling new request',
+                                           exc_info=failure_to_exc_info(f),
+                                           extra={'spider': spider}))
         return d
 
     def _handle_downloader_output(self, response, request, spider):
-        assert isinstance(response, (Request, Response, Failure)), response
+        if not isinstance(response, (Request, Response, Failure)):
+            raise TypeError(
+                "Incorrect type: expected Request, Response or Failure, got %s: %r"
+                % (type(response), response)
+            )
         # downloader middleware can return requests (for example, redirects)
         if isinstance(response, Request):
             self.crawl(response, spider)
             return
         # response is a Response or Failure
         d = self.scraper.enqueue_scrape(response, request, spider)
-        d.addErrback(log.err, spider=spider)
+        d.addErrback(lambda f: logger.error('Error while enqueuing downloader output',
+                                            exc_info=failure_to_exc_info(f),
+                                            extra={'spider': spider}))
         return d
 
     def spider_is_idle(self, spider):
-        scraper_idle = self.scraper.slot.is_idle()
-        pending = self.slot.scheduler.has_pending_requests()
-        downloading = bool(self.downloader.active)
-        pending_start_requests = self.slot.start_requests is not None
-        idle = scraper_idle and not (pending or downloading or pending_start_requests)
-        return idle
+        if not self.scraper.slot.is_idle():
+            # scraper is not idle
+            return False
+
+        if self.downloader.active:
+            # downloader has pending requests
+            return False
+
+        if self.slot.start_requests is not None:
+            # not all start requests are handled
+            return False
+
+        if self.slot.scheduler.has_pending_requests():
+            # scheduler has pending requests
+            return False
+
+        return True
 
     @property
     def open_spiders(self):
@@ -171,39 +213,47 @@ class ExecutionEngine(object):
         return not bool(self.slot)
 
     def crawl(self, request, spider):
-        assert spider in self.open_spiders, \
-            "Spider %r not opened when crawling: %s" % (spider.name, request)
+        if spider not in self.open_spiders:
+            raise RuntimeError("Spider %r not opened when crawling: %s" % (spider.name, request))
         self.schedule(request, spider)
         self.slot.nextcall.schedule()
 
     def schedule(self, request, spider):
-        self.signals.send_catch_log(signal=signals.request_scheduled,
-                request=request, spider=spider)
-        return self.slot.scheduler.enqueue_request(request)
+        self.signals.send_catch_log(signals.request_scheduled, request=request, spider=spider)
+        if not self.slot.scheduler.enqueue_request(request):
+            self.signals.send_catch_log(signals.request_dropped, request=request, spider=spider)
 
     def download(self, request, spider):
-        slot = self.slot
-        slot.add_request(request)
         d = self._download(request, spider)
-        d.addBoth(self._downloaded, slot, request, spider)
+        d.addBoth(self._downloaded, self.slot, request, spider)
         return d
 
     def _downloaded(self, response, slot, request, spider):
         slot.remove_request(request)
-        return self.download(response, spider) \
-                if isinstance(response, Request) else response
+        return self.download(response, spider) if isinstance(response, Request) else response
 
     def _download(self, request, spider):
         slot = self.slot
         slot.add_request(request)
+
         def _on_success(response):
-            assert isinstance(response, (Response, Request))
+            if not isinstance(response, (Response, Request)):
+                raise TypeError(
+                    "Incorrect type: expected Response or Request, got %s: %r"
+                    % (type(response), response)
+                )
             if isinstance(response, Response):
-                response.request = request # tie request to response received
-                logkws = self.logformatter.crawled(request, response, spider)
-                log.msg(spider=spider, **logkws)
-                self.signals.send_catch_log(signal=signals.response_received, \
-                    response=response, request=request, spider=spider)
+                if response.request is None:
+                    response.request = request
+                logkws = self.logformatter.crawled(response.request, response, spider)
+                if logkws is not None:
+                    logger.log(*logformatter_adapter(logkws), extra={'spider': spider})
+                self.signals.send_catch_log(
+                    signal=signals.response_received,
+                    response=response,
+                    request=response.request,
+                    spider=spider,
+                )
             return response
 
         def _on_complete(_):
@@ -217,9 +267,9 @@ class ExecutionEngine(object):
 
     @defer.inlineCallbacks
     def open_spider(self, spider, start_requests=(), close_if_idle=True):
-        assert self.has_capacity(), "No free spider slot when opening %r" % \
-            spider.name
-        log.msg("Spider opened", spider=spider)
+        if not self.has_capacity():
+            raise RuntimeError("No free spider slot when opening %r" % spider.name)
+        logger.info("Spider opened", extra={'spider': spider})
         nextcall = CallLaterOnce(self._next_request, spider)
         scheduler = self.scheduler_cls.from_crawler(self.crawler)
         start_requests = yield self.scraper.spidermw.process_start_requests(start_requests, spider)
@@ -231,6 +281,7 @@ class ExecutionEngine(object):
         self.crawler.stats.open_spider(spider)
         yield self.signals.send_catch_log_deferred(signals.spider_opened, spider=spider)
         slot.nextcall.schedule()
+        slot.heartbeat.start(5)
 
     def _spider_idle(self, spider):
         """Called when a spider gets idle. This function is called when there
@@ -240,11 +291,8 @@ class ExecutionEngine(object):
         next loop and this function is guaranteed to be called (at least) once
         again for this spider.
         """
-        res = self.signals.send_catch_log(signal=signals.spider_idle, \
-            spider=spider, dont_log=DontCloseSpider)
-        if any(isinstance(x, Failure) and isinstance(x.value, DontCloseSpider) \
-                for _, x in res):
-            self.slot.nextcall.schedule(5)
+        res = self.signals.send_catch_log(signals.spider_idle, spider=spider, dont_log=DontCloseSpider)
+        if any(isinstance(x, Failure) and isinstance(x.value, DontCloseSpider) for _, x in res):
             return
 
         if self.spider_is_idle(spider):
@@ -256,35 +304,46 @@ class ExecutionEngine(object):
         slot = self.slot
         if slot.closing:
             return slot.closing
-        log.msg(format="Closing spider (%(reason)s)", reason=reason, spider=spider)
+        logger.info("Closing spider (%(reason)s)",
+                    {'reason': reason},
+                    extra={'spider': spider})
 
         dfd = slot.close()
 
+        def log_failure(msg):
+            def errback(failure):
+                logger.error(
+                    msg,
+                    exc_info=failure_to_exc_info(failure),
+                    extra={'spider': spider}
+                )
+            return errback
+
         dfd.addBoth(lambda _: self.downloader.close())
-        dfd.addErrback(log.err, spider=spider)
+        dfd.addErrback(log_failure('Downloader close failure'))
 
         dfd.addBoth(lambda _: self.scraper.close_spider(spider))
-        dfd.addErrback(log.err, spider=spider)
+        dfd.addErrback(log_failure('Scraper close failure'))
 
         dfd.addBoth(lambda _: slot.scheduler.close(reason))
-        dfd.addErrback(log.err, spider=spider)
+        dfd.addErrback(log_failure('Scheduler close failure'))
 
-        # XXX: spider_stats argument was added for backwards compatibility with
-        # stats collection refactoring added in 0.15. it should be removed in 0.17.
-        dfd.addBoth(lambda _: self.signals.send_catch_log_deferred(signal=signals.spider_closed, \
-            spider=spider, reason=reason, spider_stats=self.crawler.stats.get_stats()))
-        dfd.addErrback(log.err, spider=spider)
+        dfd.addBoth(lambda _: self.signals.send_catch_log_deferred(
+            signal=signals.spider_closed, spider=spider, reason=reason))
+        dfd.addErrback(log_failure('Error while sending spider_close signal'))
 
         dfd.addBoth(lambda _: self.crawler.stats.close_spider(spider, reason=reason))
-        dfd.addErrback(log.err, spider=spider)
+        dfd.addErrback(log_failure('Stats close failure'))
 
-        dfd.addBoth(lambda _: log.msg(format="Spider closed (%(reason)s)", reason=reason, spider=spider))
+        dfd.addBoth(lambda _: logger.info("Spider closed (%(reason)s)",
+                                          {'reason': reason},
+                                          extra={'spider': spider}))
 
         dfd.addBoth(lambda _: setattr(self, 'slot', None))
-        dfd.addErrback(log.err, spider=spider)
+        dfd.addErrback(log_failure('Error while unassigning slot'))
 
         dfd.addBoth(lambda _: setattr(self, 'spider', None))
-        dfd.addErrback(log.err, spider=spider)
+        dfd.addErrback(log_failure('Error while unassigning spider'))
 
         dfd.addBoth(lambda _: self._spider_closed_callback(spider))
 
diff --git a/scrapy/core/scheduler.py b/scrapy/core/scheduler.py
index ba2ca5a03..a18c26b17 100644
--- a/scrapy/core/scheduler.py
+++ b/scrapy/core/scheduler.py
@@ -1,53 +1,95 @@
 import os
 import json
+import logging
+import warnings
 from os.path import join, exists
 
 from queuelib import PriorityQueue
-from scrapy.utils.reqser import request_to_dict, request_from_dict
-from scrapy.utils.misc import load_object
+
+from scrapy.utils.misc import load_object, create_instance
 from scrapy.utils.job import job_dir
-from scrapy import log
+from scrapy.utils.deprecate import ScrapyDeprecationWarning
 
-class Scheduler(object):
 
-    def __init__(self, dupefilter, jobdir=None, dqclass=None, mqclass=None, logunser=False, stats=None):
+logger = logging.getLogger(__name__)
+
+
+class Scheduler:
+    """
+    Scrapy Scheduler. It allows to enqueue requests and then get
+    a next request to download. Scheduler is also handling duplication
+    filtering, via dupefilter.
+
+    Prioritization and queueing is not performed by the Scheduler.
+    User sets ``priority`` field for each Request, and a PriorityQueue
+    (defined by :setting:`SCHEDULER_PRIORITY_QUEUE`) uses these priorities
+    to dequeue requests in a desired order.
+
+    Scheduler uses two PriorityQueue instances, configured to work in-memory
+    and on-disk (optional). When on-disk queue is present, it is used by
+    default, and an in-memory queue is used as a fallback for cases where
+    a disk queue can't handle a request (can't serialize it).
+
+    :setting:`SCHEDULER_MEMORY_QUEUE` and
+    :setting:`SCHEDULER_DISK_QUEUE` allow to specify lower-level queue classes
+    which PriorityQueue instances would be instantiated with, to keep requests
+    on disk and in memory respectively.
+
+    Overall, Scheduler is an object which holds several PriorityQueue instances
+    (in-memory and on-disk) and implements fallback logic for them.
+    Also, it handles dupefilters.
+    """
+    def __init__(self, dupefilter, jobdir=None, dqclass=None, mqclass=None,
+                 logunser=False, stats=None, pqclass=None, crawler=None):
         self.df = dupefilter
         self.dqdir = self._dqdir(jobdir)
+        self.pqclass = pqclass
         self.dqclass = dqclass
         self.mqclass = mqclass
         self.logunser = logunser
         self.stats = stats
+        self.crawler = crawler
 
     @classmethod
     def from_crawler(cls, crawler):
         settings = crawler.settings
         dupefilter_cls = load_object(settings['DUPEFILTER_CLASS'])
-        dupefilter = dupefilter_cls.from_settings(settings)
+        dupefilter = create_instance(dupefilter_cls, settings, crawler)
+        pqclass = load_object(settings['SCHEDULER_PRIORITY_QUEUE'])
+        if pqclass is PriorityQueue:
+            warnings.warn("SCHEDULER_PRIORITY_QUEUE='queuelib.PriorityQueue'"
+                          " is no longer supported because of API changes; "
+                          "please use 'scrapy.pqueues.ScrapyPriorityQueue'",
+                          ScrapyDeprecationWarning)
+            from scrapy.pqueues import ScrapyPriorityQueue
+            pqclass = ScrapyPriorityQueue
+
         dqclass = load_object(settings['SCHEDULER_DISK_QUEUE'])
         mqclass = load_object(settings['SCHEDULER_MEMORY_QUEUE'])
-        logunser = settings.getbool('LOG_UNSERIALIZABLE_REQUESTS')
-        return cls(dupefilter, job_dir(settings), dqclass, mqclass, logunser, crawler.stats)
+        logunser = settings.getbool('SCHEDULER_DEBUG')
+        return cls(dupefilter, jobdir=job_dir(settings), logunser=logunser,
+                   stats=crawler.stats, pqclass=pqclass, dqclass=dqclass,
+                   mqclass=mqclass, crawler=crawler)
 
     def has_pending_requests(self):
         return len(self) > 0
 
     def open(self, spider):
         self.spider = spider
-        self.mqs = PriorityQueue(self._newmq)
+        self.mqs = self._mq()
         self.dqs = self._dq() if self.dqdir else None
         return self.df.open()
 
     def close(self, reason):
         if self.dqs:
-            prios = self.dqs.close()
-            with open(join(self.dqdir, 'active.json'), 'w') as f:
-                json.dump(prios, f)
+            state = self.dqs.close()
+            self._write_dqs_state(self.dqdir, state)
         return self.df.close(reason)
 
     def enqueue_request(self, request):
         if not request.dont_filter and self.df.request_seen(request):
             self.df.log(request, self.spider)
-            return
+            return False
         dqok = self._dqpush(request)
         if dqok:
             self.stats.inc_value('scheduler/enqueued/disk', spider=self.spider)
@@ -55,6 +97,7 @@ class Scheduler(object):
             self._mqpush(request)
             self.stats.inc_value('scheduler/enqueued/memory', spider=self.spider)
         self.stats.inc_value('scheduler/enqueued', spider=self.spider)
+        return True
 
     def next_request(self):
         request = self.mqs.pop()
@@ -75,48 +118,65 @@ class Scheduler(object):
         if self.dqs is None:
             return
         try:
-            reqd = request_to_dict(request, self.spider)
-            self.dqs.push(reqd, -request.priority)
-        except ValueError as e: # non serializable request
+            self.dqs.push(request)
+        except ValueError as e:  # non serializable request
             if self.logunser:
-                log.msg(format="Unable to serialize request: %(request)s - reason: %(reason)s",
-                        level=log.ERROR, spider=self.spider,
-                        request=request, reason=e)
+                msg = ("Unable to serialize request: %(request)s - reason:"
+                       " %(reason)s - no more unserializable requests will be"
+                       " logged (stats being collected)")
+                logger.warning(msg, {'request': request, 'reason': e},
+                               exc_info=True, extra={'spider': self.spider})
+                self.logunser = False
+            self.stats.inc_value('scheduler/unserializable',
+                                 spider=self.spider)
             return
         else:
             return True
 
     def _mqpush(self, request):
-        self.mqs.push(request, -request.priority)
+        self.mqs.push(request)
 
     def _dqpop(self):
         if self.dqs:
-            d = self.dqs.pop()
-            if d:
-                return request_from_dict(d, self.spider)
+            return self.dqs.pop()
 
-    def _newmq(self, priority):
-        return self.mqclass()
-
-    def _newdq(self, priority):
-        return self.dqclass(join(self.dqdir, 'p%s' % priority))
+    def _mq(self):
+        """ Create a new priority queue instance, with in-memory storage """
+        return create_instance(self.pqclass,
+                               settings=None,
+                               crawler=self.crawler,
+                               downstream_queue_cls=self.mqclass,
+                               key='')
 
     def _dq(self):
-        activef = join(self.dqdir, 'active.json')
-        if exists(activef):
-            with open(activef) as f:
-                prios = json.load(f)
-        else:
-            prios = ()
-        q = PriorityQueue(self._newdq, startprios=prios)
+        """ Create a new priority queue instance, with disk storage """
+        state = self._read_dqs_state(self.dqdir)
+        q = create_instance(self.pqclass,
+                            settings=None,
+                            crawler=self.crawler,
+                            downstream_queue_cls=self.dqclass,
+                            key=self.dqdir,
+                            startprios=state)
         if q:
-            log.msg(format="Resuming crawl (%(queuesize)d requests scheduled)",
-                    spider=self.spider, queuesize=len(q))
+            logger.info("Resuming crawl (%(queuesize)d requests scheduled)",
+                        {'queuesize': len(q)}, extra={'spider': self.spider})
         return q
 
     def _dqdir(self, jobdir):
+        """ Return a folder name to keep disk queue state at """
         if jobdir:
             dqdir = join(jobdir, 'requests.queue')
             if not exists(dqdir):
                 os.makedirs(dqdir)
             return dqdir
+
+    def _read_dqs_state(self, dqdir):
+        path = join(dqdir, 'active.json')
+        if not exists(path):
+            return ()
+        with open(path) as f:
+            return json.load(f)
+
+    def _write_dqs_state(self, dqdir, state):
+        with open(join(dqdir, 'active.json'), 'w') as f:
+            json.dump(state, f)
diff --git a/scrapy/core/scraper.py b/scrapy/core/scraper.py
index e5120ec0d..20bdb22a1 100644
--- a/scrapy/core/scraper.py
+++ b/scrapy/core/scraper.py
@@ -1,23 +1,27 @@
 """This module implements the Scraper component which parses responses and
 extracts information from them"""
 
+import logging
 from collections import deque
 
-from twisted.python.failure import Failure
+from itemadapter import is_item
 from twisted.internet import defer
+from twisted.python.failure import Failure
 
-from scrapy.utils.defer import defer_result, defer_succeed, parallel, iter_errback
-from scrapy.utils.spider import iterate_spider_output
-from scrapy.utils.misc import load_object
-from scrapy.exceptions import CloseSpider, DropItem, IgnoreRequest
 from scrapy import signals
-from scrapy.http import Request, Response
-from scrapy.item import BaseItem
 from scrapy.core.spidermw import SpiderMiddlewareManager
-from scrapy import log
+from scrapy.exceptions import CloseSpider, DropItem, IgnoreRequest
+from scrapy.http import Request, Response
+from scrapy.utils.defer import defer_fail, defer_succeed, iter_errback, parallel
+from scrapy.utils.log import failure_to_exc_info, logformatter_adapter
+from scrapy.utils.misc import load_object, warn_on_generator_with_return_value
+from scrapy.utils.spider import iterate_spider_output
 
 
-class Slot(object):
+logger = logging.getLogger(__name__)
+
+
+class Slot:
     """Scraper slot (one per running spider)"""
 
     MIN_RESPONSE_SIZE = 1024
@@ -57,7 +61,8 @@ class Slot(object):
     def needs_backout(self):
         return self.active_size > self.max_active_size
 
-class Scraper(object):
+
+class Scraper:
 
     def __init__(self, crawler):
         self.slot = None
@@ -72,7 +77,7 @@ class Scraper(object):
     @defer.inlineCallbacks
     def open_spider(self, spider):
         """Open the given spider for scraping and allocate resources for it"""
-        self.slot = Slot()
+        self.slot = Slot(self.crawler.settings.getint('SCRAPER_SLOT_MAX_ACTIVE_SIZE'))
         yield self.itemproc.open_spider(spider)
 
     def close_spider(self, spider):
@@ -94,14 +99,19 @@ class Scraper(object):
     def enqueue_scrape(self, response, request, spider):
         slot = self.slot
         dfd = slot.add_response_request(response, request)
+
         def finish_scraping(_):
             slot.finish_response(response, request)
             self._check_if_closing(spider, slot)
             self._scrape_next(spider, slot)
             return _
+
         dfd.addBoth(finish_scraping)
-        dfd.addErrback(log.err, 'Scraper bug processing %s' % request, \
-            spider=spider)
+        dfd.addErrback(
+            lambda f: logger.error('Scraper bug processing %(request)s',
+                                   {'request': request},
+                                   exc_info=failure_to_exc_info(f),
+                                   extra={'spider': spider}))
         self._scrape_next(spider, slot)
         return dfd
 
@@ -110,32 +120,40 @@ class Scraper(object):
             response, request, deferred = slot.next_response_request_deferred()
             self._scrape(response, request, spider).chainDeferred(deferred)
 
-    def _scrape(self, response, request, spider):
-        """Handle the downloaded response or failure trough the spider
-        callback/errback"""
-        assert isinstance(response, (Response, Failure))
-
-        dfd = self._scrape2(response, request, spider) # returns spiders processed output
-        dfd.addErrback(self.handle_spider_error, request, response, spider)
-        dfd.addCallback(self.handle_spider_output, request, response, spider)
+    def _scrape(self, result, request, spider):
+        """
+        Handle the downloaded response or failure through the spider callback/errback
+        """
+        if not isinstance(result, (Response, Failure)):
+            raise TypeError("Incorrect type: expected Response or Failure, got %s: %r" % (type(result), result))
+        dfd = self._scrape2(result, request, spider)  # returns spider's processed output
+        dfd.addErrback(self.handle_spider_error, request, result, spider)
+        dfd.addCallback(self.handle_spider_output, request, result, spider)
         return dfd
 
-    def _scrape2(self, request_result, request, spider):
-        """Handle the different cases of request's result been a Response or a
-        Failure"""
-        if not isinstance(request_result, Failure):
-            return self.spidermw.scrape_response(self.call_spider, \
-                request_result, request, spider)
-        else:
-            # FIXME: don't ignore errors in spider middleware
-            dfd = self.call_spider(request_result, request, spider)
-            return dfd.addErrback(self._log_download_errors, \
-                request_result, request, spider)
+    def _scrape2(self, result, request, spider):
+        """
+        Handle the different cases of request's result been a Response or a Failure
+        """
+        if isinstance(result, Response):
+            return self.spidermw.scrape_response(self.call_spider, result, request, spider)
+        else:  # result is a Failure
+            dfd = self.call_spider(result, request, spider)
+            return dfd.addErrback(self._log_download_errors, result, request, spider)
 
     def call_spider(self, result, request, spider):
-        result.request = request
-        dfd = defer_result(result)
-        dfd.addCallbacks(request.callback or spider.parse, request.errback)
+        if isinstance(result, Response):
+            if getattr(result, "request", None) is None:
+                result.request = request
+            callback = result.request.callback or spider._parse
+            warn_on_generator_with_return_value(spider, callback)
+            dfd = defer_succeed(result)
+            dfd.addCallback(callback, **result.request.cb_kwargs)
+        else:  # result is a Failure
+            result.request = request
+            warn_on_generator_with_return_value(spider, request.errback)
+            dfd = defer_fail(result)
+            dfd.addErrback(request.errback)
         return dfd.addCallback(iterate_spider_output)
 
     def handle_spider_error(self, _failure, request, response, spider):
@@ -143,18 +161,28 @@ class Scraper(object):
         if isinstance(exc, CloseSpider):
             self.crawler.engine.close_spider(spider, exc.reason or 'cancelled')
             return
-        log.err(_failure, "Spider error processing %s" % request, spider=spider)
-        self.signals.send_catch_log(signal=signals.spider_error, failure=_failure, response=response, \
-            spider=spider)
-        self.crawler.stats.inc_value("spider_exceptions/%s" % _failure.value.__class__.__name__, \
-            spider=spider)
+        logkws = self.logformatter.spider_error(_failure, request, response, spider)
+        logger.log(
+            *logformatter_adapter(logkws),
+            exc_info=failure_to_exc_info(_failure),
+            extra={'spider': spider}
+        )
+        self.signals.send_catch_log(
+            signal=signals.spider_error,
+            failure=_failure, response=response,
+            spider=spider
+        )
+        self.crawler.stats.inc_value(
+            "spider_exceptions/%s" % _failure.value.__class__.__name__,
+            spider=spider
+        )
 
     def handle_spider_output(self, result, request, response, spider):
         if not result:
             return defer_succeed(None)
         it = iter_errback(result, self.handle_spider_error, request, response, spider)
-        dfd = parallel(it, self.concurrent_items,
-            self._process_spidermw_output, request, response, spider)
+        dfd = parallel(it, self.concurrent_items, self._process_spidermw_output,
+                       request, response, spider)
         return dfd
 
     def _process_spidermw_output(self, output, request, response, spider):
@@ -163,7 +191,7 @@ class Scraper(object):
         """
         if isinstance(output, Request):
             self.crawler.engine.crawl(request=output, spider=spider)
-        elif isinstance(output, BaseItem):
+        elif is_item(output):
             self.slot.itemproc_size += 1
             dfd = self.itemproc.process_item(output, spider)
             dfd.addBoth(self._itemproc_finished, output, response, spider)
@@ -172,25 +200,33 @@ class Scraper(object):
             pass
         else:
             typename = type(output).__name__
-            log.msg(format='Spider must return Request, BaseItem or None, '
-                           'got %(typename)r in %(request)s',
-                    level=log.ERROR, spider=spider, request=request, typename=typename)
+            logger.error(
+                'Spider must return request, item, or None, got %(typename)r in %(request)s',
+                {'request': request, 'typename': typename},
+                extra={'spider': spider},
+            )
 
     def _log_download_errors(self, spider_failure, download_failure, request, spider):
         """Log and silence errors that come from the engine (typically download
         errors that got propagated thru here)
         """
-        if isinstance(download_failure, Failure) \
-                and not download_failure.check(IgnoreRequest):
+        if isinstance(download_failure, Failure) and not download_failure.check(IgnoreRequest):
             if download_failure.frames:
-                log.err(download_failure, 'Error downloading %s' % request,
-                        spider=spider)
+                logkws = self.logformatter.download_error(download_failure, request, spider)
+                logger.log(
+                    *logformatter_adapter(logkws),
+                    extra={'spider': spider},
+                    exc_info=failure_to_exc_info(download_failure),
+                )
             else:
                 errmsg = download_failure.getErrorMessage()
                 if errmsg:
-                    log.msg(format='Error downloading %(request)s: %(errmsg)s',
-                            level=log.ERROR, spider=spider, request=request,
-                            errmsg=errmsg)
+                    logkws = self.logformatter.download_error(
+                        download_failure, request, spider, errmsg)
+                    logger.log(
+                        *logformatter_adapter(logkws),
+                        extra={'spider': spider},
+                    )
 
         if spider_failure is not download_failure:
             return spider_failure
@@ -203,14 +239,22 @@ class Scraper(object):
             ex = output.value
             if isinstance(ex, DropItem):
                 logkws = self.logformatter.dropped(item, ex, response, spider)
-                log.msg(spider=spider, **logkws)
-                return self.signals.send_catch_log_deferred(signal=signals.item_dropped, \
-                    item=item, response=response, spider=spider, exception=output.value)
+                if logkws is not None:
+                    logger.log(*logformatter_adapter(logkws), extra={'spider': spider})
+                return self.signals.send_catch_log_deferred(
+                    signal=signals.item_dropped, item=item, response=response,
+                    spider=spider, exception=output.value)
             else:
-                log.err(output, 'Error processing %s' % item, spider=spider)
+                logkws = self.logformatter.item_error(item, ex, response, spider)
+                logger.log(*logformatter_adapter(logkws), extra={'spider': spider},
+                           exc_info=failure_to_exc_info(output))
+                return self.signals.send_catch_log_deferred(
+                    signal=signals.item_error, item=item, response=response,
+                    spider=spider, failure=output)
         else:
             logkws = self.logformatter.scraped(output, response, spider)
-            log.msg(spider=spider, **logkws)
-            return self.signals.send_catch_log_deferred(signal=signals.item_scraped, \
-                item=output, response=response, spider=spider)
-
+            if logkws is not None:
+                logger.log(*logformatter_adapter(logkws), extra={'spider': spider})
+            return self.signals.send_catch_log_deferred(
+                signal=signals.item_scraped, item=output, response=response,
+                spider=spider)
diff --git a/scrapy/core/spidermw.py b/scrapy/core/spidermw.py
index f6bb62afb..5a99b96be 100644
--- a/scrapy/core/spidermw.py
+++ b/scrapy/core/spidermw.py
@@ -3,72 +3,125 @@ Spider Middleware manager
 
 See documentation in docs/topics/spider-middleware.rst
 """
+from itertools import islice
 
 from twisted.python.failure import Failure
+
+from scrapy.exceptions import _InvalidOutput
 from scrapy.middleware import MiddlewareManager
-from scrapy.utils.defer import mustbe_deferred
 from scrapy.utils.conf import build_component_list
+from scrapy.utils.defer import mustbe_deferred
+from scrapy.utils.python import MutableChain
+
 
 def _isiterable(possible_iterator):
     return hasattr(possible_iterator, '__iter__')
 
+
+def _fname(f):
+    return "{}.{}".format(
+        f.__self__.__class__.__name__,
+        f.__func__.__name__
+    )
+
+
 class SpiderMiddlewareManager(MiddlewareManager):
 
     component_name = 'spider middleware'
 
     @classmethod
     def _get_mwlist_from_settings(cls, settings):
-        return build_component_list(settings['SPIDER_MIDDLEWARES_BASE'], \
-            settings['SPIDER_MIDDLEWARES'])
+        return build_component_list(settings.getwithbase('SPIDER_MIDDLEWARES'))
 
     def _add_middleware(self, mw):
-        super(SpiderMiddlewareManager, self)._add_middleware(mw)
+        super()._add_middleware(mw)
         if hasattr(mw, 'process_spider_input'):
             self.methods['process_spider_input'].append(mw.process_spider_input)
-        if hasattr(mw, 'process_spider_output'):
-            self.methods['process_spider_output'].insert(0, mw.process_spider_output)
-        if hasattr(mw, 'process_spider_exception'):
-            self.methods['process_spider_exception'].insert(0, mw.process_spider_exception)
         if hasattr(mw, 'process_start_requests'):
-            self.methods['process_start_requests'].insert(0, mw.process_start_requests)
+            self.methods['process_start_requests'].appendleft(mw.process_start_requests)
+        process_spider_output = getattr(mw, 'process_spider_output', None)
+        self.methods['process_spider_output'].appendleft(process_spider_output)
+        process_spider_exception = getattr(mw, 'process_spider_exception', None)
+        self.methods['process_spider_exception'].appendleft(process_spider_exception)
 
     def scrape_response(self, scrape_func, response, request, spider):
-        fname = lambda f:'%s.%s' % (f.im_self.__class__.__name__, f.im_func.__name__)
 
         def process_spider_input(response):
             for method in self.methods['process_spider_input']:
                 try:
                     result = method(response=response, spider=spider)
-                    assert result is None, \
-                            'Middleware %s must returns None or ' \
-                            'raise an exception, got %s ' \
-                            % (fname(method), type(result))
-                except:
+                    if result is not None:
+                        msg = "Middleware {} must return None or raise an exception, got {}"
+                        raise _InvalidOutput(msg.format(_fname(method), type(result)))
+                except _InvalidOutput:
+                    raise
+                except Exception:
                     return scrape_func(Failure(), request, spider)
             return scrape_func(response, request, spider)
 
-        def process_spider_exception(_failure):
+        def _evaluate_iterable(iterable, exception_processor_index, recover_to):
+            try:
+                for r in iterable:
+                    yield r
+            except Exception as ex:
+                exception_result = process_spider_exception(Failure(ex), exception_processor_index)
+                if isinstance(exception_result, Failure):
+                    raise
+                recover_to.extend(exception_result)
+
+        def process_spider_exception(_failure, start_index=0):
             exception = _failure.value
-            for method in self.methods['process_spider_exception']:
+            # don't handle _InvalidOutput exception
+            if isinstance(exception, _InvalidOutput):
+                return _failure
+            method_list = islice(self.methods['process_spider_exception'], start_index, None)
+            for method_index, method in enumerate(method_list, start=start_index):
+                if method is None:
+                    continue
                 result = method(response=response, exception=exception, spider=spider)
-                assert result is None or _isiterable(result), \
-                    'Middleware %s must returns None, or an iterable object, got %s ' % \
-                    (fname(method), type(result))
-                if result is not None:
-                    return result
+                if _isiterable(result):
+                    # stop exception handling by handing control over to the
+                    # process_spider_output chain if an iterable has been returned
+                    return process_spider_output(result, method_index + 1)
+                elif result is None:
+                    continue
+                else:
+                    msg = "Middleware {} must return None or an iterable, got {}"
+                    raise _InvalidOutput(msg.format(_fname(method), type(result)))
             return _failure
 
-        def process_spider_output(result):
-            for method in self.methods['process_spider_output']:
-                result = method(response=response, result=result, spider=spider)
-                assert _isiterable(result), \
-                    'Middleware %s must returns an iterable object, got %s ' % \
-                    (fname(method), type(result))
-            return result
+        def process_spider_output(result, start_index=0):
+            # items in this iterable do not need to go through the process_spider_output
+            # chain, they went through it already from the process_spider_exception method
+            recovered = MutableChain()
+
+            method_list = islice(self.methods['process_spider_output'], start_index, None)
+            for method_index, method in enumerate(method_list, start=start_index):
+                if method is None:
+                    continue
+                try:
+                    # might fail directly if the output value is not a generator
+                    result = method(response=response, result=result, spider=spider)
+                except Exception as ex:
+                    exception_result = process_spider_exception(Failure(ex), method_index + 1)
+                    if isinstance(exception_result, Failure):
+                        raise
+                    return exception_result
+                if _isiterable(result):
+                    result = _evaluate_iterable(result, method_index + 1, recovered)
+                else:
+                    msg = "Middleware {} must return an iterable, got {}"
+                    raise _InvalidOutput(msg.format(_fname(method), type(result)))
+
+            return MutableChain(result, recovered)
+
+        def process_callback_output(result):
+            recovered = MutableChain()
+            result = _evaluate_iterable(result, 0, recovered)
+            return MutableChain(process_spider_output(result), recovered)
 
         dfd = mustbe_deferred(process_spider_input, response)
-        dfd.addErrback(process_spider_exception)
-        dfd.addCallback(process_spider_output)
+        dfd.addCallbacks(callback=process_callback_output, errback=process_spider_exception)
         return dfd
 
     def process_start_requests(self, start_requests, spider):
diff --git a/scrapy/crawler.py b/scrapy/crawler.py
index cfd6c8003..4c6b0e496 100644
--- a/scrapy/crawler.py
+++ b/scrapy/crawler.py
@@ -1,156 +1,344 @@
+import logging
+import pprint
 import signal
+import warnings
 
-from twisted.internet import reactor, defer
+from twisted.internet import defer
+from zope.interface.exceptions import DoesNotImplement
 
+try:
+    # zope >= 5.0 only supports MultipleInvalid
+    from zope.interface.exceptions import MultipleInvalid
+except ImportError:
+    MultipleInvalid = None
+
+from zope.interface.verify import verifyClass
+
+from scrapy import signals, Spider
 from scrapy.core.engine import ExecutionEngine
-from scrapy.resolver import CachingThreadedResolver
+from scrapy.exceptions import ScrapyDeprecationWarning
 from scrapy.extension import ExtensionManager
+from scrapy.interfaces import ISpiderLoader
+from scrapy.settings import overridden_settings, Settings
 from scrapy.signalmanager import SignalManager
+from scrapy.utils.log import (
+    configure_logging,
+    get_scrapy_root_handler,
+    install_scrapy_root_handler,
+    log_scrapy_info,
+    LogCounterHandler,
+)
+from scrapy.utils.misc import create_instance, load_object
 from scrapy.utils.ossignal import install_shutdown_handlers, signal_names
-from scrapy.utils.misc import load_object
-from scrapy import log, signals
+from scrapy.utils.reactor import install_reactor, verify_installed_reactor
 
 
-class Crawler(object):
+logger = logging.getLogger(__name__)
+
+
+class Crawler:
+
+    def __init__(self, spidercls, settings=None):
+        if isinstance(spidercls, Spider):
+            raise ValueError('The spidercls argument must be a class, not an object')
+
+        if isinstance(settings, dict) or settings is None:
+            settings = Settings(settings)
+
+        self.spidercls = spidercls
+        self.settings = settings.copy()
+        self.spidercls.update_settings(self.settings)
 
-    def __init__(self, settings):
-        self.configured = False
-        self.settings = settings
         self.signals = SignalManager(self)
-        self.stats = load_object(settings['STATS_CLASS'])(self)
-        self._start_requests = lambda: ()
-        self._spider = None
-        # TODO: move SpiderManager to CrawlerProcess
-        spman_cls = load_object(self.settings['SPIDER_MANAGER_CLASS'])
-        self.spiders = spman_cls.from_crawler(self)
+        self.stats = load_object(self.settings['STATS_CLASS'])(self)
 
-    def install(self):
-        # TODO: remove together with scrapy.project.crawler usage
-        import scrapy.project
-        assert not hasattr(scrapy.project, 'crawler'), "crawler already installed"
-        scrapy.project.crawler = self
+        handler = LogCounterHandler(self, level=self.settings.get('LOG_LEVEL'))
+        logging.root.addHandler(handler)
 
-    def uninstall(self):
-        # TODO: remove together with scrapy.project.crawler usage
-        import scrapy.project
-        assert hasattr(scrapy.project, 'crawler'), "crawler not installed"
-        del scrapy.project.crawler
+        d = dict(overridden_settings(self.settings))
+        logger.info("Overridden settings:\n%(settings)s",
+                    {'settings': pprint.pformat(d)})
 
-    def configure(self):
-        if self.configured:
-            return
+        if get_scrapy_root_handler() is not None:
+            # scrapy root handler already installed: update it with new settings
+            install_scrapy_root_handler(self.settings)
+        # lambda is assigned to Crawler attribute because this way it is not
+        # garbage collected after leaving __init__ scope
+        self.__remove_handler = lambda: logging.root.removeHandler(handler)
+        self.signals.connect(self.__remove_handler, signals.engine_stopped)
 
-        self.configured = True
         lf_cls = load_object(self.settings['LOG_FORMATTER'])
         self.logformatter = lf_cls.from_crawler(self)
         self.extensions = ExtensionManager.from_crawler(self)
-        self.engine = ExecutionEngine(self, self._spider_closed)
 
-    def crawl(self, spider, requests=None):
-        assert self._spider is None, 'Spider already attached'
-        self._spider = spider
-        spider.set_crawler(self)
-        if requests is None:
-            self._start_requests = spider.start_requests
-        else:
-            self._start_requests = lambda: requests
-
-    def _spider_closed(self, spider=None):
-        if not self.engine.open_spiders:
-            self.stop()
+        self.settings.freeze()
+        self.crawling = False
+        self.spider = None
+        self.engine = None
 
     @defer.inlineCallbacks
-    def start(self):
-        yield defer.maybeDeferred(self.configure)
-        if self._spider:
-            yield self.engine.open_spider(self._spider, self._start_requests())
-        yield defer.maybeDeferred(self.engine.start)
+    def crawl(self, *args, **kwargs):
+        if self.crawling:
+            raise RuntimeError("Crawling already taking place")
+        self.crawling = True
+
+        try:
+            self.spider = self._create_spider(*args, **kwargs)
+            self.engine = self._create_engine()
+            start_requests = iter(self.spider.start_requests())
+            yield self.engine.open_spider(self.spider, start_requests)
+            yield defer.maybeDeferred(self.engine.start)
+        except Exception:
+            self.crawling = False
+            if self.engine is not None:
+                yield self.engine.close()
+            raise
+
+    def _create_spider(self, *args, **kwargs):
+        return self.spidercls.from_crawler(self, *args, **kwargs)
+
+    def _create_engine(self):
+        return ExecutionEngine(self, lambda _: self.stop())
 
     @defer.inlineCallbacks
     def stop(self):
-        if self.configured and self.engine.running:
+        """Starts a graceful stop of the crawler and returns a deferred that is
+        fired when the crawler is stopped."""
+        if self.crawling:
+            self.crawling = False
             yield defer.maybeDeferred(self.engine.stop)
 
 
-class CrawlerProcess(object):
-    """ A class to run multiple scrapy crawlers in a process sequentially"""
+class CrawlerRunner:
+    """
+    This is a convenient helper class that keeps track of, manages and runs
+    crawlers inside an already setup :mod:`~twisted.internet.reactor`.
 
-    def __init__(self, settings):
-        install_shutdown_handlers(self._signal_shutdown)
+    The CrawlerRunner object must be instantiated with a
+    :class:`~scrapy.settings.Settings` object.
+
+    This class shouldn't be needed (since Scrapy is responsible of using it
+    accordingly) unless writing scripts that manually handle the crawling
+    process. See :ref:`run-from-script` for an example.
+    """
+
+    crawlers = property(
+        lambda self: self._crawlers,
+        doc="Set of :class:`crawlers <scrapy.crawler.Crawler>` started by "
+            ":meth:`crawl` and managed by this class."
+    )
+
+    @staticmethod
+    def _get_spider_loader(settings):
+        """ Get SpiderLoader instance from settings """
+        cls_path = settings.get('SPIDER_LOADER_CLASS')
+        loader_cls = load_object(cls_path)
+        excs = (DoesNotImplement, MultipleInvalid) if MultipleInvalid else DoesNotImplement
+        try:
+            verifyClass(ISpiderLoader, loader_cls)
+        except excs:
+            warnings.warn(
+                'SPIDER_LOADER_CLASS (previously named SPIDER_MANAGER_CLASS) does '
+                'not fully implement scrapy.interfaces.ISpiderLoader interface. '
+                'Please add all missing methods to avoid unexpected runtime errors.',
+                category=ScrapyDeprecationWarning, stacklevel=2
+            )
+        return loader_cls.from_settings(settings.frozencopy())
+
+    def __init__(self, settings=None):
+        if isinstance(settings, dict) or settings is None:
+            settings = Settings(settings)
         self.settings = settings
-        self.crawlers = {}
-        self.stopping = False
-        self._started = None
+        self.spider_loader = self._get_spider_loader(settings)
+        self._crawlers = set()
+        self._active = set()
+        self.bootstrap_failed = False
+        self._handle_twisted_reactor()
 
-    def create_crawler(self, name=None):
-        if name not in self.crawlers:
-            self.crawlers[name] = Crawler(self.settings)
+    @property
+    def spiders(self):
+        warnings.warn("CrawlerRunner.spiders attribute is renamed to "
+                      "CrawlerRunner.spider_loader.",
+                      category=ScrapyDeprecationWarning, stacklevel=2)
+        return self.spider_loader
 
-        return self.crawlers[name]
+    def crawl(self, crawler_or_spidercls, *args, **kwargs):
+        """
+        Run a crawler with the provided arguments.
 
-    def start(self):
-        if self.start_crawling():
-            self.start_reactor()
+        It will call the given Crawler's :meth:`~Crawler.crawl` method, while
+        keeping track of it so it can be stopped later.
+
+        If ``crawler_or_spidercls`` isn't a :class:`~scrapy.crawler.Crawler`
+        instance, this method will try to create one using this parameter as
+        the spider class given to it.
+
+        Returns a deferred that is fired when the crawling is finished.
+
+        :param crawler_or_spidercls: already created crawler, or a spider class
+            or spider's name inside the project to create it
+        :type crawler_or_spidercls: :class:`~scrapy.crawler.Crawler` instance,
+            :class:`~scrapy.spiders.Spider` subclass or string
+
+        :param list args: arguments to initialize the spider
+
+        :param dict kwargs: keyword arguments to initialize the spider
+        """
+        if isinstance(crawler_or_spidercls, Spider):
+            raise ValueError(
+                'The crawler_or_spidercls argument cannot be a spider object, '
+                'it must be a spider class (or a Crawler object)')
+        crawler = self.create_crawler(crawler_or_spidercls)
+        return self._crawl(crawler, *args, **kwargs)
+
+    def _crawl(self, crawler, *args, **kwargs):
+        self.crawlers.add(crawler)
+        d = crawler.crawl(*args, **kwargs)
+        self._active.add(d)
+
+        def _done(result):
+            self.crawlers.discard(crawler)
+            self._active.discard(d)
+            self.bootstrap_failed |= not getattr(crawler, 'spider', None)
+            return result
+
+        return d.addBoth(_done)
+
+    def create_crawler(self, crawler_or_spidercls):
+        """
+        Return a :class:`~scrapy.crawler.Crawler` object.
+
+        * If ``crawler_or_spidercls`` is a Crawler, it is returned as-is.
+        * If ``crawler_or_spidercls`` is a Spider subclass, a new Crawler
+          is constructed for it.
+        * If ``crawler_or_spidercls`` is a string, this function finds
+          a spider with this name in a Scrapy project (using spider loader),
+          then creates a Crawler instance for it.
+        """
+        if isinstance(crawler_or_spidercls, Spider):
+            raise ValueError(
+                'The crawler_or_spidercls argument cannot be a spider object, '
+                'it must be a spider class (or a Crawler object)')
+        if isinstance(crawler_or_spidercls, Crawler):
+            return crawler_or_spidercls
+        return self._create_crawler(crawler_or_spidercls)
+
+    def _create_crawler(self, spidercls):
+        if isinstance(spidercls, str):
+            spidercls = self.spider_loader.load(spidercls)
+        return Crawler(spidercls, self.settings)
+
+    def stop(self):
+        """
+        Stops simultaneously all the crawling jobs taking place.
+
+        Returns a deferred that is fired when they all have ended.
+        """
+        return defer.DeferredList([c.stop() for c in list(self.crawlers)])
 
     @defer.inlineCallbacks
-    def stop(self):
-        self.stopping = True
-        if self._active_crawler:
-            yield self._active_crawler.stop()
+    def join(self):
+        """
+        join()
+
+        Returns a deferred that is fired when all managed :attr:`crawlers` have
+        completed their executions.
+        """
+        while self._active:
+            yield defer.DeferredList(self._active)
+
+    def _handle_twisted_reactor(self):
+        if self.settings.get("TWISTED_REACTOR"):
+            verify_installed_reactor(self.settings["TWISTED_REACTOR"])
+
+
+class CrawlerProcess(CrawlerRunner):
+    """
+    A class to run multiple scrapy crawlers in a process simultaneously.
+
+    This class extends :class:`~scrapy.crawler.CrawlerRunner` by adding support
+    for starting a :mod:`~twisted.internet.reactor` and handling shutdown
+    signals, like the keyboard interrupt command Ctrl-C. It also configures
+    top-level logging.
+
+    This utility should be a better fit than
+    :class:`~scrapy.crawler.CrawlerRunner` if you aren't running another
+    :mod:`~twisted.internet.reactor` within your application.
+
+    The CrawlerProcess object must be instantiated with a
+    :class:`~scrapy.settings.Settings` object.
+
+    :param install_root_handler: whether to install root logging handler
+        (default: True)
+
+    This class shouldn't be needed (since Scrapy is responsible of using it
+    accordingly) unless writing scripts that manually handle the crawling
+    process. See :ref:`run-from-script` for an example.
+    """
+
+    def __init__(self, settings=None, install_root_handler=True):
+        super().__init__(settings)
+        install_shutdown_handlers(self._signal_shutdown)
+        configure_logging(self.settings, install_root_handler)
+        log_scrapy_info(self.settings)
 
     def _signal_shutdown(self, signum, _):
+        from twisted.internet import reactor
         install_shutdown_handlers(self._signal_kill)
         signame = signal_names[signum]
-        log.msg(format="Received %(signame)s, shutting down gracefully. Send again to force ",
-                level=log.INFO, signame=signame)
-        reactor.callFromThread(self.stop)
+        logger.info("Received %(signame)s, shutting down gracefully. Send again to force ",
+                    {'signame': signame})
+        reactor.callFromThread(self._graceful_stop_reactor)
 
     def _signal_kill(self, signum, _):
+        from twisted.internet import reactor
         install_shutdown_handlers(signal.SIG_IGN)
         signame = signal_names[signum]
-        log.msg(format='Received %(signame)s twice, forcing unclean shutdown',
-                level=log.INFO, signame=signame)
+        logger.info('Received %(signame)s twice, forcing unclean shutdown',
+                    {'signame': signame})
         reactor.callFromThread(self._stop_reactor)
 
-    # ------------------------------------------------------------------------#
-    # The following public methods can't be considered stable and may change at
-    # any moment.
-    #
-    # start_crawling and start_reactor are called from scrapy.commands.shell
-    # They are splitted because reactor is started on a different thread than IPython shell.
-    #
-    def start_crawling(self):
-        log.scrapy_info(self.settings)
-        return self._start_crawler() is not None
+    def start(self, stop_after_crawl=True):
+        """
+        This method starts a :mod:`~twisted.internet.reactor`, adjusts its pool
+        size to :setting:`REACTOR_THREADPOOL_MAXSIZE`, and installs a DNS cache
+        based on :setting:`DNSCACHE_ENABLED` and :setting:`DNSCACHE_SIZE`.
 
-    def start_reactor(self):
-        if self.settings.getbool('DNSCACHE_ENABLED'):
-            reactor.installResolver(CachingThreadedResolver(reactor))
+        If ``stop_after_crawl`` is True, the reactor will be stopped after all
+        crawlers have finished, using :meth:`join`.
+
+        :param bool stop_after_crawl: stop or not the reactor when all
+            crawlers have finished
+        """
+        from twisted.internet import reactor
+        if stop_after_crawl:
+            d = self.join()
+            # Don't start the reactor if the deferreds are already fired
+            if d.called:
+                return
+            d.addBoth(self._stop_reactor)
+
+        resolver_class = load_object(self.settings["DNS_RESOLVER"])
+        resolver = create_instance(resolver_class, self.settings, self, reactor=reactor)
+        resolver.install_on_reactor()
+        tp = reactor.getThreadPool()
+        tp.adjustPoolsize(maxthreads=self.settings.getint('REACTOR_THREADPOOL_MAXSIZE'))
         reactor.addSystemEventTrigger('before', 'shutdown', self.stop)
         reactor.run(installSignalHandlers=False)  # blocking call
 
-    def _start_crawler(self):
-        if not self.crawlers or self.stopping:
-            return
-
-        name, crawler = self.crawlers.popitem()
-        self._active_crawler = crawler
-        log_observer = log.start_from_crawler(crawler)
-        crawler.configure()
-        crawler.install()
-        crawler.signals.connect(crawler.uninstall, signals.engine_stopped)
-        if log_observer:
-            crawler.signals.connect(log_observer.stop, signals.engine_stopped)
-        crawler.signals.connect(self._check_done, signals.engine_stopped)
-        crawler.start()
-        return name, crawler
-
-    def _check_done(self, **kwargs):
-        if not self._start_crawler():
-            self._stop_reactor()
+    def _graceful_stop_reactor(self):
+        d = self.stop()
+        d.addBoth(self._stop_reactor)
+        return d
 
     def _stop_reactor(self, _=None):
+        from twisted.internet import reactor
         try:
             reactor.stop()
         except RuntimeError:  # raised if already stopped or in shutdown stage
             pass
+
+    def _handle_twisted_reactor(self):
+        if self.settings.get("TWISTED_REACTOR"):
+            install_reactor(self.settings["TWISTED_REACTOR"], self.settings["ASYNCIO_EVENT_LOOP"])
+        super()._handle_twisted_reactor()
diff --git a/scrapy/contrib/__init__.py b/scrapy/downloadermiddlewares/__init__.py
similarity index 100%
rename from scrapy/contrib/__init__.py
rename to scrapy/downloadermiddlewares/__init__.py
diff --git a/scrapy/contrib/downloadermiddleware/ajaxcrawl.py b/scrapy/downloadermiddlewares/ajaxcrawl.py
similarity index 77%
rename from scrapy/contrib/downloadermiddleware/ajaxcrawl.py
rename to scrapy/downloadermiddlewares/ajaxcrawl.py
index fcbfdb1e7..4e12a5044 100644
--- a/scrapy/contrib/downloadermiddleware/ajaxcrawl.py
+++ b/scrapy/downloadermiddlewares/ajaxcrawl.py
@@ -1,13 +1,16 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
 import re
-from scrapy import log
-from scrapy.exceptions import NotConfigured
-from scrapy.http import HtmlResponse
-from scrapy.utils.response import _noscript_re, _script_re
+import logging
+
 from w3lib import html
 
-class AjaxCrawlMiddleware(object):
+from scrapy.exceptions import NotConfigured
+from scrapy.http import HtmlResponse
+
+
+logger = logging.getLogger(__name__)
+
+
+class AjaxCrawlMiddleware:
     """
     Handle 'AJAX crawlable' pages marked as crawlable via meta tag.
     For more info see https://developers.google.com/webmasters/ajax-crawling/docs/getting-started.
@@ -43,10 +46,10 @@ class AjaxCrawlMiddleware(object):
             return response
 
         # scrapy already handles #! links properly
-        ajax_crawl_request = request.replace(url=request.url+'#!')
-        log.msg(format="Downloading AJAX crawlable %(ajax_crawl_request)s instead of %(request)s",
-                level=log.DEBUG, spider=spider,
-                ajax_crawl_request=ajax_crawl_request, request=request)
+        ajax_crawl_request = request.replace(url=request.url + '#!')
+        logger.debug("Downloading AJAX crawlable %(ajax_crawl_request)s instead of %(request)s",
+                     {'ajax_crawl_request': ajax_crawl_request, 'request': request},
+                     extra={'spider': spider})
 
         ajax_crawl_request.meta['ajax_crawlable'] = True
         return ajax_crawl_request
@@ -56,12 +59,14 @@ class AjaxCrawlMiddleware(object):
         Return True if a page without hash fragment could be "AJAX crawlable"
         according to https://developers.google.com/webmasters/ajax-crawling/docs/getting-started.
         """
-        body = response.body_as_unicode()[:self.lookup_bytes]
+        body = response.text[:self.lookup_bytes]
         return _has_ajaxcrawlable_meta(body)
 
 
 # XXX: move it to w3lib?
-_ajax_crawlable_re = re.compile(ur'<meta\s+name=["\']fragment["\']\s+content=["\']!["\']/?>')
+_ajax_crawlable_re = re.compile(r'<meta\s+name=["\']fragment["\']\s+content=["\']!["\']/?>')
+
+
 def _has_ajaxcrawlable_meta(text):
     """
     >>> _has_ajaxcrawlable_meta('<html><head><meta name="fragment"  content="!"/></head><body></body></html>')
@@ -82,8 +87,7 @@ def _has_ajaxcrawlable_meta(text):
     if 'content' not in text:
         return False
 
-    text = _script_re.sub(u'', text)
-    text = _noscript_re.sub(u'', text)
-    text = html.remove_comments(html.replace_entities(text))
+    text = html.remove_tags_with_content(text, ('script', 'noscript'))
+    text = html.replace_entities(text)
+    text = html.remove_comments(text)
     return _ajax_crawlable_re.search(text) is not None
-
diff --git a/scrapy/downloadermiddlewares/cookies.py b/scrapy/downloadermiddlewares/cookies.py
new file mode 100644
index 000000000..77048f389
--- /dev/null
+++ b/scrapy/downloadermiddlewares/cookies.py
@@ -0,0 +1,131 @@
+import logging
+from collections import defaultdict
+
+from scrapy.exceptions import NotConfigured
+from scrapy.http import Response
+from scrapy.http.cookies import CookieJar
+from scrapy.utils.python import to_unicode
+
+
+logger = logging.getLogger(__name__)
+
+
+class CookiesMiddleware:
+    """This middleware enables working with sites that need cookies"""
+
+    def __init__(self, debug=False):
+        self.jars = defaultdict(CookieJar)
+        self.debug = debug
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        if not crawler.settings.getbool('COOKIES_ENABLED'):
+            raise NotConfigured
+        return cls(crawler.settings.getbool('COOKIES_DEBUG'))
+
+    def process_request(self, request, spider):
+        if request.meta.get('dont_merge_cookies', False):
+            return
+
+        cookiejarkey = request.meta.get("cookiejar")
+        jar = self.jars[cookiejarkey]
+        for cookie in self._get_request_cookies(jar, request):
+            jar.set_cookie_if_ok(cookie, request)
+
+        # set Cookie header
+        request.headers.pop('Cookie', None)
+        jar.add_cookie_header(request)
+        self._debug_cookie(request, spider)
+
+    def process_response(self, request, response, spider):
+        if request.meta.get('dont_merge_cookies', False):
+            return response
+
+        # extract cookies from Set-Cookie and drop invalid/expired cookies
+        cookiejarkey = request.meta.get("cookiejar")
+        jar = self.jars[cookiejarkey]
+        jar.extract_cookies(response, request)
+        self._debug_set_cookie(response, spider)
+
+        return response
+
+    def _debug_cookie(self, request, spider):
+        if self.debug:
+            cl = [to_unicode(c, errors='replace')
+                  for c in request.headers.getlist('Cookie')]
+            if cl:
+                cookies = "\n".join("Cookie: {}\n".format(c) for c in cl)
+                msg = "Sending cookies to: {}\n{}".format(request, cookies)
+                logger.debug(msg, extra={'spider': spider})
+
+    def _debug_set_cookie(self, response, spider):
+        if self.debug:
+            cl = [to_unicode(c, errors='replace')
+                  for c in response.headers.getlist('Set-Cookie')]
+            if cl:
+                cookies = "\n".join("Set-Cookie: {}\n".format(c) for c in cl)
+                msg = "Received cookies from: {}\n{}".format(response, cookies)
+                logger.debug(msg, extra={'spider': spider})
+
+    def _format_cookie(self, cookie, request):
+        """
+        Given a dict consisting of cookie components, return its string representation.
+        Decode from bytes if necessary.
+        """
+        decoded = {}
+        for key in ("name", "value", "path", "domain"):
+            if not cookie.get(key):
+                if key in ("name", "value"):
+                    msg = "Invalid cookie found in request {}: {} ('{}' is missing)"
+                    logger.warning(msg.format(request, cookie, key))
+                    return
+                continue
+            if isinstance(cookie[key], str):
+                decoded[key] = cookie[key]
+            else:
+                try:
+                    decoded[key] = cookie[key].decode("utf8")
+                except UnicodeDecodeError:
+                    logger.warning("Non UTF-8 encoded cookie found in request %s: %s",
+                                   request, cookie)
+                    decoded[key] = cookie[key].decode("latin1", errors="replace")
+
+        cookie_str = "{}={}".format(decoded.pop("name"), decoded.pop("value"))
+        for key, value in decoded.items():  # path, domain
+            cookie_str += "; {}={}".format(key.capitalize(), value)
+        return cookie_str
+
+    def _get_request_cookies(self, jar, request):
+        """
+        Extract cookies from a Request. Values from the `Request.cookies` attribute
+        take precedence over values from the `Cookie` request header.
+        """
+        def get_cookies_from_header(jar, request):
+            cookie_header = request.headers.get("Cookie")
+            if not cookie_header:
+                return []
+            cookie_gen_bytes = (s.strip() for s in cookie_header.split(b";"))
+            cookie_list_unicode = []
+            for cookie_bytes in cookie_gen_bytes:
+                try:
+                    cookie_unicode = cookie_bytes.decode("utf8")
+                except UnicodeDecodeError:
+                    logger.warning("Non UTF-8 encoded cookie found in request %s: %s",
+                                   request, cookie_bytes)
+                    cookie_unicode = cookie_bytes.decode("latin1", errors="replace")
+                cookie_list_unicode.append(cookie_unicode)
+            response = Response(request.url, headers={"Set-Cookie": cookie_list_unicode})
+            return jar.make_cookies(response, request)
+
+        def get_cookies_from_attribute(jar, request):
+            if not request.cookies:
+                return []
+            elif isinstance(request.cookies, dict):
+                cookies = ({"name": k, "value": v} for k, v in request.cookies.items())
+            else:
+                cookies = request.cookies
+            formatted = filter(None, (self._format_cookie(c, request) for c in cookies))
+            response = Response(request.url, headers={"Set-Cookie": formatted})
+            return jar.make_cookies(response, request)
+
+        return get_cookies_from_header(jar, request) + get_cookies_from_attribute(jar, request)
diff --git a/scrapy/contrib_exp/downloadermiddleware/decompression.py b/scrapy/downloadermiddlewares/decompression.py
similarity index 85%
rename from scrapy/contrib_exp/downloadermiddleware/decompression.py
rename to scrapy/downloadermiddlewares/decompression.py
index c08f50b5f..0fcf8fb8c 100644
--- a/scrapy/contrib_exp/downloadermiddleware/decompression.py
+++ b/scrapy/downloadermiddlewares/decompression.py
@@ -1,25 +1,22 @@
 """ This module implements the DecompressionMiddleware which tries to recognise
-and extract the potentially compressed responses that may arrive. 
+and extract the potentially compressed responses that may arrive.
 """
 
 import bz2
 import gzip
-import zipfile
+import logging
 import tarfile
+import zipfile
+from io import BytesIO
 from tempfile import mktemp
 
-import six
-
-try:
-    from cStringIO import StringIO as BytesIO
-except ImportError:
-    from io import BytesIO
-
-from scrapy import log
 from scrapy.responsetypes import responsetypes
 
 
-class DecompressionMiddleware(object):
+logger = logging.getLogger(__name__)
+
+
+class DecompressionMiddleware:
     """ This middleware tries to recognise and extract the possibly compressed
     responses that may arrive. """
 
@@ -77,10 +74,10 @@ class DecompressionMiddleware(object):
         if not response.body:
             return response
 
-        for fmt, func in six.iteritems(self._formats):
+        for fmt, func in self._formats.items():
             new_response = func(response)
             if new_response:
-                log.msg(format='Decompressed response with format: %(responsefmt)s',
-                        level=log.DEBUG, spider=spider, responsefmt=fmt)
+                logger.debug('Decompressed response with format: %(responsefmt)s',
+                             {'responsefmt': fmt}, extra={'spider': spider})
                 return new_response
         return response
diff --git a/scrapy/contrib/downloadermiddleware/defaultheaders.py b/scrapy/downloadermiddlewares/defaultheaders.py
similarity index 63%
rename from scrapy/contrib/downloadermiddleware/defaultheaders.py
rename to scrapy/downloadermiddlewares/defaultheaders.py
index f1d2bd631..f67961881 100644
--- a/scrapy/contrib/downloadermiddleware/defaultheaders.py
+++ b/scrapy/downloadermiddlewares/defaultheaders.py
@@ -4,15 +4,18 @@ DefaultHeaders downloader middleware
 See documentation in docs/topics/downloader-middleware.rst
 """
 
+from scrapy.utils.python import without_none_values
 
-class DefaultHeadersMiddleware(object):
+
+class DefaultHeadersMiddleware:
 
     def __init__(self, headers):
         self._headers = headers
 
     @classmethod
     def from_crawler(cls, crawler):
-        return cls(crawler.settings.get('DEFAULT_REQUEST_HEADERS').items())
+        headers = without_none_values(crawler.settings['DEFAULT_REQUEST_HEADERS'])
+        return cls(headers.items())
 
     def process_request(self, request, spider):
         for k, v in self._headers:
diff --git a/scrapy/contrib/downloadermiddleware/downloadtimeout.py b/scrapy/downloadermiddlewares/downloadtimeout.py
similarity index 86%
rename from scrapy/contrib/downloadermiddleware/downloadtimeout.py
rename to scrapy/downloadermiddlewares/downloadtimeout.py
index 612b081ec..d373a22df 100644
--- a/scrapy/contrib/downloadermiddleware/downloadtimeout.py
+++ b/scrapy/downloadermiddlewares/downloadtimeout.py
@@ -7,14 +7,14 @@ See documentation in docs/topics/downloader-middleware.rst
 from scrapy import signals
 
 
-class DownloadTimeoutMiddleware(object):
+class DownloadTimeoutMiddleware:
 
     def __init__(self, timeout=180):
         self._timeout = timeout
 
     @classmethod
     def from_crawler(cls, crawler):
-        o = cls(crawler.settings['DOWNLOAD_TIMEOUT'])
+        o = cls(crawler.settings.getfloat('DOWNLOAD_TIMEOUT'))
         crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
         return o
 
diff --git a/scrapy/contrib/downloadermiddleware/httpauth.py b/scrapy/downloadermiddlewares/httpauth.py
similarity index 83%
rename from scrapy/contrib/downloadermiddleware/httpauth.py
rename to scrapy/downloadermiddlewares/httpauth.py
index 7854e899a..089bf0d85 100644
--- a/scrapy/contrib/downloadermiddleware/httpauth.py
+++ b/scrapy/downloadermiddlewares/httpauth.py
@@ -9,7 +9,7 @@ from w3lib.http import basic_auth_header
 from scrapy import signals
 
 
-class HttpAuthMiddleware(object):
+class HttpAuthMiddleware:
     """Set Basic HTTP Authorization header
     (http_user and http_pass spider class attributes)"""
 
@@ -27,5 +27,5 @@ class HttpAuthMiddleware(object):
 
     def process_request(self, request, spider):
         auth = getattr(self, 'auth', None)
-        if auth and 'Authorization' not in request.headers:
-            request.headers['Authorization'] = auth
+        if auth and b'Authorization' not in request.headers:
+            request.headers[b'Authorization'] = auth
diff --git a/scrapy/contrib/downloadermiddleware/httpcache.py b/scrapy/downloadermiddlewares/httpcache.py
similarity index 56%
rename from scrapy/contrib/downloadermiddleware/httpcache.py
rename to scrapy/downloadermiddlewares/httpcache.py
index 90aa6cab7..62f1c3a29 100644
--- a/scrapy/contrib/downloadermiddleware/httpcache.py
+++ b/scrapy/downloadermiddlewares/httpcache.py
@@ -1,12 +1,40 @@
 from email.utils import formatdate
+from typing import Optional, Type, TypeVar
+
+from twisted.internet import defer
+from twisted.internet.error import (
+    ConnectError,
+    ConnectionDone,
+    ConnectionLost,
+    ConnectionRefusedError,
+    DNSLookupError,
+    TCPTimedOutError,
+    TimeoutError,
+)
+from twisted.web.client import ResponseFailed
+
 from scrapy import signals
-from scrapy.exceptions import NotConfigured, IgnoreRequest
+from scrapy.crawler import Crawler
+from scrapy.exceptions import IgnoreRequest, NotConfigured
+from scrapy.http.request import Request
+from scrapy.http.response import Response
+from scrapy.settings import Settings
+from scrapy.spiders import Spider
+from scrapy.statscollectors import StatsCollector
 from scrapy.utils.misc import load_object
 
 
-class HttpCacheMiddleware(object):
+HttpCacheMiddlewareTV = TypeVar("HttpCacheMiddlewareTV", bound="HttpCacheMiddleware")
 
-    def __init__(self, settings, stats):
+
+class HttpCacheMiddleware:
+
+    DOWNLOAD_EXCEPTIONS = (defer.TimeoutError, TimeoutError, DNSLookupError,
+                           ConnectionRefusedError, ConnectionDone, ConnectError,
+                           ConnectionLost, TCPTimedOutError, ResponseFailed,
+                           IOError)
+
+    def __init__(self, settings: Settings, stats: StatsCollector) -> None:
         if not settings.getbool('HTTPCACHE_ENABLED'):
             raise NotConfigured
         self.policy = load_object(settings['HTTPCACHE_POLICY'])(settings)
@@ -15,23 +43,26 @@ class HttpCacheMiddleware(object):
         self.stats = stats
 
     @classmethod
-    def from_crawler(cls, crawler):
+    def from_crawler(cls: Type[HttpCacheMiddlewareTV], crawler: Crawler) -> HttpCacheMiddlewareTV:
         o = cls(crawler.settings, crawler.stats)
         crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
         crawler.signals.connect(o.spider_closed, signal=signals.spider_closed)
         return o
 
-    def spider_opened(self, spider):
+    def spider_opened(self, spider: Spider) -> None:
         self.storage.open_spider(spider)
 
-    def spider_closed(self, spider):
+    def spider_closed(self, spider: Spider) -> None:
         self.storage.close_spider(spider)
 
-    def process_request(self, request, spider):
+    def process_request(self, request: Request, spider: Spider) -> Optional[Response]:
+        if request.meta.get('dont_cache', False):
+            return None
+
         # Skip uncacheable requests
         if not self.policy.should_cache_request(request):
             request.meta['_dont_cache'] = True  # flag as uncacheable
-            return
+            return None
 
         # Look for cached response and check if expired
         cachedresponse = self.storage.retrieve_response(spider, request)
@@ -40,7 +71,7 @@ class HttpCacheMiddleware(object):
             if self.ignore_missing:
                 self.stats.inc_value('httpcache/ignore', spider=spider)
                 raise IgnoreRequest("Ignored request not in cache: %s" % request)
-            return  # first time request
+            return None  # first time request
 
         # Return cached response only if not expired
         cachedresponse.flags.append('cached')
@@ -52,16 +83,21 @@ class HttpCacheMiddleware(object):
         # process_response hook
         request.meta['cached_response'] = cachedresponse
 
-    def process_response(self, request, response, spider):
+        return None
+
+    def process_response(self, request: Request, response: Response, spider: Spider) -> Response:
+        if request.meta.get('dont_cache', False):
+            return response
+
         # Skip cached responses and uncacheable requests
         if 'cached' in response.flags or '_dont_cache' in request.meta:
             request.meta.pop('_dont_cache', None)
             return response
 
         # RFC2616 requires origin server to set Date header,
-        # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.18
+        # https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.18
         if 'Date' not in response.headers:
-            response.headers['Date'] = formatdate(usegmt=1)
+            response.headers['Date'] = formatdate(usegmt=True)
 
         # Do not validate first-hand responses
         cachedresponse = request.meta.pop('cached_response', None)
@@ -78,22 +114,20 @@ class HttpCacheMiddleware(object):
         self._cache_response(spider, response, request, cachedresponse)
         return response
 
-    def _cache_response(self, spider, response, request, cachedresponse):
+    def process_exception(
+        self, request: Request, exception: Exception, spider: Spider
+    ) -> Optional[Response]:
+        cachedresponse = request.meta.pop('cached_response', None)
+        if cachedresponse is not None and isinstance(exception, self.DOWNLOAD_EXCEPTIONS):
+            self.stats.inc_value('httpcache/errorrecovery', spider=spider)
+            return cachedresponse
+        return None
+
+    def _cache_response(
+        self, spider: Spider, response: Response, request: Request, cachedresponse: Optional[Response]
+    ) -> None:
         if self.policy.should_cache_response(response, request):
             self.stats.inc_value('httpcache/store', spider=spider)
             self.storage.store_response(spider, request, response)
         else:
             self.stats.inc_value('httpcache/uncacheable', spider=spider)
-
-
-from scrapy.contrib.httpcache import FilesystemCacheStorage as _FilesystemCacheStorage
-class FilesystemCacheStorage(_FilesystemCacheStorage):
-
-    def __init__(self, *args, **kwargs):
-        import warnings
-        from scrapy.exceptions import ScrapyDeprecationWarning
-        warnings.warn('Importing FilesystemCacheStorage from '
-                      'scrapy.contrib.downloadermiddlware.httpcache is '
-                      'deprecated, use scrapy.contrib.httpcache instead.',
-                      category=ScrapyDeprecationWarning, stacklevel=1)
-        super(FilesystemCacheStorage, self).__init__(*args, **kwargs)
diff --git a/scrapy/contrib/downloadermiddleware/httpcompression.py b/scrapy/downloadermiddlewares/httpcompression.py
similarity index 70%
rename from scrapy/contrib/downloadermiddleware/httpcompression.py
rename to scrapy/downloadermiddlewares/httpcompression.py
index 719507396..727c41466 100644
--- a/scrapy/contrib/downloadermiddleware/httpcompression.py
+++ b/scrapy/downloadermiddlewares/httpcompression.py
@@ -1,32 +1,45 @@
 import zlib
 
-from scrapy.utils.gz import gunzip, is_gzipped
+from scrapy.utils.gz import gunzip
 from scrapy.http import Response, TextResponse
 from scrapy.responsetypes import responsetypes
 from scrapy.exceptions import NotConfigured
 
 
-class HttpCompressionMiddleware(object):
+ACCEPTED_ENCODINGS = [b'gzip', b'deflate']
+
+try:
+    import brotli
+    ACCEPTED_ENCODINGS.append(b'br')
+except ImportError:
+    pass
+
+
+class HttpCompressionMiddleware:
     """This middleware allows compressed (gzip, deflate) traffic to be
     sent/received from web sites"""
-    
     @classmethod
     def from_crawler(cls, crawler):
         if not crawler.settings.getbool('COMPRESSION_ENABLED'):
             raise NotConfigured
         return cls()
-    
+
     def process_request(self, request, spider):
-        request.headers.setdefault('Accept-Encoding', 'gzip,deflate')
+        request.headers.setdefault('Accept-Encoding',
+                                   b", ".join(ACCEPTED_ENCODINGS))
 
     def process_response(self, request, response, spider):
+
+        if request.method == 'HEAD':
+            return response
         if isinstance(response, Response):
             content_encoding = response.headers.getlist('Content-Encoding')
-            if content_encoding and not is_gzipped(response):
+            if content_encoding:
                 encoding = content_encoding.pop()
                 decoded_body = self._decode(response.body, encoding.lower())
-                respcls = responsetypes.from_args(headers=response.headers, \
-                    url=response.url)
+                respcls = responsetypes.from_args(
+                    headers=response.headers, url=response.url, body=decoded_body
+                )
                 kwargs = dict(cls=respcls, body=decoded_body)
                 if issubclass(respcls, TextResponse):
                     # force recalculating the encoding until we make sure the
@@ -39,10 +52,10 @@ class HttpCompressionMiddleware(object):
         return response
 
     def _decode(self, body, encoding):
-        if encoding == 'gzip' or encoding == 'x-gzip':
+        if encoding == b'gzip' or encoding == b'x-gzip':
             body = gunzip(body)
 
-        if encoding == 'deflate':
+        if encoding == b'deflate':
             try:
                 body = zlib.decompress(body)
             except zlib.error:
@@ -52,5 +65,6 @@ class HttpCompressionMiddleware(object):
                 # http://www.port80software.com/200ok/archive/2005/10/31/868.aspx
                 # http://www.gzip.org/zlib/zlib_faq.html#faq38
                 body = zlib.decompress(body, -15)
+        if encoding == b'br' and b'br' in ACCEPTED_ENCODINGS:
+            body = brotli.decompress(body)
         return body
-
diff --git a/scrapy/downloadermiddlewares/httpproxy.py b/scrapy/downloadermiddlewares/httpproxy.py
new file mode 100644
index 000000000..da89d3e9b
--- /dev/null
+++ b/scrapy/downloadermiddlewares/httpproxy.py
@@ -0,0 +1,70 @@
+import base64
+from urllib.parse import unquote, urlunparse
+from urllib.request import getproxies, proxy_bypass, _parse_proxy
+
+from scrapy.exceptions import NotConfigured
+from scrapy.utils.httpobj import urlparse_cached
+from scrapy.utils.python import to_bytes
+
+
+class HttpProxyMiddleware:
+
+    def __init__(self, auth_encoding='latin-1'):
+        self.auth_encoding = auth_encoding
+        self.proxies = {}
+        for type_, url in getproxies().items():
+            self.proxies[type_] = self._get_proxy(url, type_)
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        if not crawler.settings.getbool('HTTPPROXY_ENABLED'):
+            raise NotConfigured
+        auth_encoding = crawler.settings.get('HTTPPROXY_AUTH_ENCODING')
+        return cls(auth_encoding)
+
+    def _basic_auth_header(self, username, password):
+        user_pass = to_bytes(
+            '%s:%s' % (unquote(username), unquote(password)),
+            encoding=self.auth_encoding)
+        return base64.b64encode(user_pass)
+
+    def _get_proxy(self, url, orig_type):
+        proxy_type, user, password, hostport = _parse_proxy(url)
+        proxy_url = urlunparse((proxy_type or orig_type, hostport, '', '', '', ''))
+
+        if user:
+            creds = self._basic_auth_header(user, password)
+        else:
+            creds = None
+
+        return creds, proxy_url
+
+    def process_request(self, request, spider):
+        # ignore if proxy is already set
+        if 'proxy' in request.meta:
+            if request.meta['proxy'] is None:
+                return
+            # extract credentials if present
+            creds, proxy_url = self._get_proxy(request.meta['proxy'], '')
+            request.meta['proxy'] = proxy_url
+            if creds and not request.headers.get('Proxy-Authorization'):
+                request.headers['Proxy-Authorization'] = b'Basic ' + creds
+            return
+        elif not self.proxies:
+            return
+
+        parsed = urlparse_cached(request)
+        scheme = parsed.scheme
+
+        # 'no_proxy' is only supported by http schemes
+        if scheme in ('http', 'https') and proxy_bypass(parsed.hostname):
+            return
+
+        if scheme in self.proxies:
+            self._set_proxy(request, scheme)
+
+    def _set_proxy(self, request, scheme):
+        creds, proxy = self.proxies[scheme]
+        request.meta['proxy'] = proxy
+        if creds:
+            request.headers['Proxy-Authorization'] = b'Basic ' + creds
diff --git a/scrapy/downloadermiddlewares/redirect.py b/scrapy/downloadermiddlewares/redirect.py
new file mode 100644
index 000000000..4053fecc5
--- /dev/null
+++ b/scrapy/downloadermiddlewares/redirect.py
@@ -0,0 +1,113 @@
+import logging
+from urllib.parse import urljoin, urlparse
+
+from w3lib.url import safe_url_string
+
+from scrapy.http import HtmlResponse
+from scrapy.utils.response import get_meta_refresh
+from scrapy.exceptions import IgnoreRequest, NotConfigured
+
+
+logger = logging.getLogger(__name__)
+
+
+class BaseRedirectMiddleware:
+
+    enabled_setting = 'REDIRECT_ENABLED'
+
+    def __init__(self, settings):
+        if not settings.getbool(self.enabled_setting):
+            raise NotConfigured
+
+        self.max_redirect_times = settings.getint('REDIRECT_MAX_TIMES')
+        self.priority_adjust = settings.getint('REDIRECT_PRIORITY_ADJUST')
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        return cls(crawler.settings)
+
+    def _redirect(self, redirected, request, spider, reason):
+        ttl = request.meta.setdefault('redirect_ttl', self.max_redirect_times)
+        redirects = request.meta.get('redirect_times', 0) + 1
+
+        if ttl and redirects <= self.max_redirect_times:
+            redirected.meta['redirect_times'] = redirects
+            redirected.meta['redirect_ttl'] = ttl - 1
+            redirected.meta['redirect_urls'] = request.meta.get('redirect_urls', []) + [request.url]
+            redirected.meta['redirect_reasons'] = request.meta.get('redirect_reasons', []) + [reason]
+            redirected.dont_filter = request.dont_filter
+            redirected.priority = request.priority + self.priority_adjust
+            logger.debug("Redirecting (%(reason)s) to %(redirected)s from %(request)s",
+                         {'reason': reason, 'redirected': redirected, 'request': request},
+                         extra={'spider': spider})
+            return redirected
+        else:
+            logger.debug("Discarding %(request)s: max redirections reached",
+                         {'request': request}, extra={'spider': spider})
+            raise IgnoreRequest("max redirections reached")
+
+    def _redirect_request_using_get(self, request, redirect_url):
+        redirected = request.replace(url=redirect_url, method='GET', body='')
+        redirected.headers.pop('Content-Type', None)
+        redirected.headers.pop('Content-Length', None)
+        return redirected
+
+
+class RedirectMiddleware(BaseRedirectMiddleware):
+    """
+    Handle redirection of requests based on response status
+    and meta-refresh html tag.
+    """
+
+    def process_response(self, request, response, spider):
+        if (
+            request.meta.get('dont_redirect', False)
+            or response.status in getattr(spider, 'handle_httpstatus_list', [])
+            or response.status in request.meta.get('handle_httpstatus_list', [])
+            or request.meta.get('handle_httpstatus_all', False)
+        ):
+            return response
+
+        allowed_status = (301, 302, 303, 307, 308)
+        if 'Location' not in response.headers or response.status not in allowed_status:
+            return response
+
+        location = safe_url_string(response.headers['Location'])
+        if response.headers['Location'].startswith(b'//'):
+            request_scheme = urlparse(request.url).scheme
+            location = request_scheme + '://' + location.lstrip('/')
+
+        redirected_url = urljoin(request.url, location)
+
+        if response.status in (301, 307, 308) or request.method == 'HEAD':
+            redirected = request.replace(url=redirected_url)
+            return self._redirect(redirected, request, spider, response.status)
+
+        redirected = self._redirect_request_using_get(request, redirected_url)
+        return self._redirect(redirected, request, spider, response.status)
+
+
+class MetaRefreshMiddleware(BaseRedirectMiddleware):
+
+    enabled_setting = 'METAREFRESH_ENABLED'
+
+    def __init__(self, settings):
+        super().__init__(settings)
+        self._ignore_tags = settings.getlist('METAREFRESH_IGNORE_TAGS')
+        self._maxdelay = settings.getint('METAREFRESH_MAXDELAY')
+
+    def process_response(self, request, response, spider):
+        if (
+            request.meta.get('dont_redirect', False)
+            or request.method == 'HEAD'
+            or not isinstance(response, HtmlResponse)
+        ):
+            return response
+
+        interval, url = get_meta_refresh(response,
+                                         ignore_tags=self._ignore_tags)
+        if url and interval < self._maxdelay:
+            redirected = self._redirect_request_using_get(request, url)
+            return self._redirect(redirected, request, spider, 'meta refresh')
+
+        return response
diff --git a/scrapy/contrib/downloadermiddleware/retry.py b/scrapy/downloadermiddlewares/retry.py
similarity index 57%
rename from scrapy/contrib/downloadermiddleware/retry.py
rename to scrapy/downloadermiddlewares/retry.py
index f72f39431..67be8c282 100644
--- a/scrapy/contrib/downloadermiddleware/retry.py
+++ b/scrapy/downloadermiddlewares/retry.py
@@ -7,36 +7,38 @@ RETRY_TIMES - how many times to retry a failed page
 RETRY_HTTP_CODES - which HTTP response codes to retry
 
 Failed pages are collected on the scraping process and rescheduled at the end,
-once the spider has finished crawling all regular (non failed) pages. Once
-there is no more failed pages to retry this middleware sends a signal
-(retry_complete), so other extensions could connect to that signal.
-
-About HTTP errors to consider:
-
-- You may want to remove 400 from RETRY_HTTP_CODES, if you stick to the HTTP
-  protocol. It's included by default because it's a common code used to
-  indicate server overload, which would be something we want to retry
+once the spider has finished crawling all regular (non failed) pages.
 """
+import logging
 
 from twisted.internet import defer
-from twisted.internet.error import TimeoutError, DNSLookupError, \
-        ConnectionRefusedError, ConnectionDone, ConnectError, \
-        ConnectionLost, TCPTimedOutError
+from twisted.internet.error import (
+    ConnectError,
+    ConnectionDone,
+    ConnectionLost,
+    ConnectionRefusedError,
+    DNSLookupError,
+    TCPTimedOutError,
+    TimeoutError,
+)
+from twisted.web.client import ResponseFailed
 
-from scrapy import log
 from scrapy.exceptions import NotConfigured
 from scrapy.utils.response import response_status_message
-from scrapy.xlib.tx import ResponseFailed
+from scrapy.core.downloader.handlers.http11 import TunnelError
+from scrapy.utils.python import global_object_name
+
+logger = logging.getLogger(__name__)
 
 
-class RetryMiddleware(object):
+class RetryMiddleware:
 
     # IOError is raised by the HttpCompression middleware when trying to
     # decompress an empty response
     EXCEPTIONS_TO_RETRY = (defer.TimeoutError, TimeoutError, DNSLookupError,
                            ConnectionRefusedError, ConnectionDone, ConnectError,
                            ConnectionLost, TCPTimedOutError, ResponseFailed,
-                           IOError)
+                           IOError, TunnelError)
 
     def __init__(self, settings):
         if not settings.getbool('RETRY_ENABLED'):
@@ -58,21 +60,38 @@ class RetryMiddleware(object):
         return response
 
     def process_exception(self, request, exception, spider):
-        if isinstance(exception, self.EXCEPTIONS_TO_RETRY) \
-                and not request.meta.get('dont_retry', False):
-             return self._retry(request, exception, spider)
+        if (
+            isinstance(exception, self.EXCEPTIONS_TO_RETRY)
+            and not request.meta.get('dont_retry', False)
+        ):
+            return self._retry(request, exception, spider)
 
     def _retry(self, request, reason, spider):
         retries = request.meta.get('retry_times', 0) + 1
 
-        if retries <= self.max_retry_times:
-            log.msg(format="Retrying %(request)s (failed %(retries)d times): %(reason)s",
-                    level=log.DEBUG, spider=spider, request=request, retries=retries, reason=reason)
+        retry_times = self.max_retry_times
+
+        if 'max_retry_times' in request.meta:
+            retry_times = request.meta['max_retry_times']
+
+        stats = spider.crawler.stats
+        if retries <= retry_times:
+            logger.debug("Retrying %(request)s (failed %(retries)d times): %(reason)s",
+                         {'request': request, 'retries': retries, 'reason': reason},
+                         extra={'spider': spider})
             retryreq = request.copy()
             retryreq.meta['retry_times'] = retries
             retryreq.dont_filter = True
             retryreq.priority = request.priority + self.priority_adjust
+
+            if isinstance(reason, Exception):
+                reason = global_object_name(reason.__class__)
+
+            stats.inc_value('retry/count')
+            stats.inc_value('retry/reason_count/%s' % reason)
             return retryreq
         else:
-            log.msg(format="Gave up retrying %(request)s (failed %(retries)d times): %(reason)s",
-                    level=log.DEBUG, spider=spider, request=request, retries=retries, reason=reason)
+            stats.inc_value('retry/max_reached')
+            logger.error("Gave up retrying %(request)s (failed %(retries)d times): %(reason)s",
+                         {'request': request, 'retries': retries, 'reason': reason},
+                         extra={'spider': spider})
diff --git a/scrapy/downloadermiddlewares/robotstxt.py b/scrapy/downloadermiddlewares/robotstxt.py
new file mode 100644
index 000000000..7f18b2bf2
--- /dev/null
+++ b/scrapy/downloadermiddlewares/robotstxt.py
@@ -0,0 +1,109 @@
+"""
+This is a middleware to respect robots.txt policies. To activate it you must
+enable this middleware and enable the ROBOTSTXT_OBEY setting.
+
+"""
+
+import logging
+
+from twisted.internet.defer import Deferred, maybeDeferred
+from scrapy.exceptions import NotConfigured, IgnoreRequest
+from scrapy.http import Request
+from scrapy.utils.httpobj import urlparse_cached
+from scrapy.utils.log import failure_to_exc_info
+from scrapy.utils.misc import load_object
+
+logger = logging.getLogger(__name__)
+
+
+class RobotsTxtMiddleware:
+    DOWNLOAD_PRIORITY = 1000
+
+    def __init__(self, crawler):
+        if not crawler.settings.getbool('ROBOTSTXT_OBEY'):
+            raise NotConfigured
+        self._default_useragent = crawler.settings.get('USER_AGENT', 'Scrapy')
+        self._robotstxt_useragent = crawler.settings.get('ROBOTSTXT_USER_AGENT', None)
+        self.crawler = crawler
+        self._parsers = {}
+        self._parserimpl = load_object(crawler.settings.get('ROBOTSTXT_PARSER'))
+
+        # check if parser dependencies are met, this should throw an error otherwise.
+        self._parserimpl.from_crawler(self.crawler, b'')
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        return cls(crawler)
+
+    def process_request(self, request, spider):
+        if request.meta.get('dont_obey_robotstxt'):
+            return
+        d = maybeDeferred(self.robot_parser, request, spider)
+        d.addCallback(self.process_request_2, request, spider)
+        return d
+
+    def process_request_2(self, rp, request, spider):
+        if rp is None:
+            return
+
+        useragent = self._robotstxt_useragent
+        if not useragent:
+            useragent = request.headers.get(b'User-Agent', self._default_useragent)
+        if not rp.allowed(request.url, useragent):
+            logger.debug("Forbidden by robots.txt: %(request)s",
+                         {'request': request}, extra={'spider': spider})
+            self.crawler.stats.inc_value('robotstxt/forbidden')
+            raise IgnoreRequest("Forbidden by robots.txt")
+
+    def robot_parser(self, request, spider):
+        url = urlparse_cached(request)
+        netloc = url.netloc
+
+        if netloc not in self._parsers:
+            self._parsers[netloc] = Deferred()
+            robotsurl = "%s://%s/robots.txt" % (url.scheme, url.netloc)
+            robotsreq = Request(
+                robotsurl,
+                priority=self.DOWNLOAD_PRIORITY,
+                meta={'dont_obey_robotstxt': True}
+            )
+            dfd = self.crawler.engine.download(robotsreq, spider)
+            dfd.addCallback(self._parse_robots, netloc, spider)
+            dfd.addErrback(self._logerror, robotsreq, spider)
+            dfd.addErrback(self._robots_error, netloc)
+            self.crawler.stats.inc_value('robotstxt/request_count')
+
+        if isinstance(self._parsers[netloc], Deferred):
+            d = Deferred()
+
+            def cb(result):
+                d.callback(result)
+                return result
+            self._parsers[netloc].addCallback(cb)
+            return d
+        else:
+            return self._parsers[netloc]
+
+    def _logerror(self, failure, request, spider):
+        if failure.type is not IgnoreRequest:
+            logger.error("Error downloading %(request)s: %(f_exception)s",
+                         {'request': request, 'f_exception': failure.value},
+                         exc_info=failure_to_exc_info(failure),
+                         extra={'spider': spider})
+        return failure
+
+    def _parse_robots(self, response, netloc, spider):
+        self.crawler.stats.inc_value('robotstxt/response_count')
+        self.crawler.stats.inc_value('robotstxt/response_status_count/{}'.format(response.status))
+        rp = self._parserimpl.from_crawler(self.crawler, response.body)
+        rp_dfd = self._parsers[netloc]
+        self._parsers[netloc] = rp
+        rp_dfd.callback(rp)
+
+    def _robots_error(self, failure, netloc):
+        if failure.type is not IgnoreRequest:
+            key = 'robotstxt/exception_count/{}'.format(failure.type)
+            self.crawler.stats.inc_value(key)
+        rp_dfd = self._parsers[netloc]
+        self._parsers[netloc] = None
+        rp_dfd.callback(None)
diff --git a/scrapy/contrib/downloadermiddleware/stats.py b/scrapy/downloadermiddlewares/stats.py
similarity index 90%
rename from scrapy/contrib/downloadermiddleware/stats.py
rename to scrapy/downloadermiddlewares/stats.py
index 9c0ad90a5..46a2ad397 100644
--- a/scrapy/contrib/downloadermiddleware/stats.py
+++ b/scrapy/downloadermiddlewares/stats.py
@@ -1,8 +1,10 @@
 from scrapy.exceptions import NotConfigured
 from scrapy.utils.request import request_httprepr
 from scrapy.utils.response import response_httprepr
+from scrapy.utils.python import global_object_name
 
-class DownloaderStats(object):
+
+class DownloaderStats:
 
     def __init__(self, stats):
         self.stats = stats
@@ -27,6 +29,6 @@ class DownloaderStats(object):
         return response
 
     def process_exception(self, request, exception, spider):
-        ex_class = "%s.%s" % (exception.__class__.__module__, exception.__class__.__name__)
+        ex_class = global_object_name(exception.__class__)
         self.stats.inc_value('downloader/exception_count', spider=spider)
         self.stats.inc_value('downloader/exception_type_count/%s' % ex_class, spider=spider)
diff --git a/scrapy/contrib/downloadermiddleware/useragent.py b/scrapy/downloadermiddlewares/useragent.py
similarity index 85%
rename from scrapy/contrib/downloadermiddleware/useragent.py
rename to scrapy/downloadermiddlewares/useragent.py
index 17793cad7..3ee7bd129 100644
--- a/scrapy/contrib/downloadermiddleware/useragent.py
+++ b/scrapy/downloadermiddlewares/useragent.py
@@ -3,7 +3,7 @@
 from scrapy import signals
 
 
-class UserAgentMiddleware(object):
+class UserAgentMiddleware:
     """This middleware allows spiders to override the user_agent"""
 
     def __init__(self, user_agent='Scrapy'):
@@ -20,4 +20,4 @@ class UserAgentMiddleware(object):
 
     def process_request(self, request, spider):
         if self.user_agent:
-            request.headers.setdefault('User-Agent', self.user_agent)
+            request.headers.setdefault(b'User-Agent', self.user_agent)
diff --git a/scrapy/dupefilter.py b/scrapy/dupefilters.py
similarity index 73%
rename from scrapy/dupefilter.py
rename to scrapy/dupefilters.py
index 9bd6a6e05..ac5478e7c 100644
--- a/scrapy/dupefilter.py
+++ b/scrapy/dupefilters.py
@@ -1,12 +1,11 @@
-from __future__ import print_function
 import os
+import logging
 
-from scrapy import log
 from scrapy.utils.job import job_dir
-from scrapy.utils.request import request_fingerprint
+from scrapy.utils.request import referer_str, request_fingerprint
 
 
-class BaseDupeFilter(object):
+class BaseDupeFilter:
 
     @classmethod
     def from_settings(cls, settings):
@@ -33,8 +32,10 @@ class RFPDupeFilter(BaseDupeFilter):
         self.fingerprints = set()
         self.logdupes = True
         self.debug = debug
+        self.logger = logging.getLogger(__name__)
         if path:
             self.file = open(os.path.join(path, 'requests.seen'), 'a+')
+            self.file.seek(0)
             self.fingerprints.update(x.rstrip() for x in self.file)
 
     @classmethod
@@ -48,7 +49,7 @@ class RFPDupeFilter(BaseDupeFilter):
             return True
         self.fingerprints.add(fp)
         if self.file:
-            self.file.write(fp + os.linesep)
+            self.file.write(fp + '\n')
 
     def request_fingerprint(self, request):
         return request_fingerprint(request)
@@ -59,13 +60,14 @@ class RFPDupeFilter(BaseDupeFilter):
 
     def log(self, request, spider):
         if self.debug:
-            fmt = "Filtered duplicate request: %(request)s"
-            log.msg(format=fmt, request=request, level=log.DEBUG, spider=spider)
+            msg = "Filtered duplicate request: %(request)s (referer: %(referer)s)"
+            args = {'request': request, 'referer': referer_str(request)}
+            self.logger.debug(msg, args, extra={'spider': spider})
         elif self.logdupes:
-            fmt = ("Filtered duplicate request: %(request)s"
+            msg = ("Filtered duplicate request: %(request)s"
                    " - no more duplicates will be shown"
                    " (see DUPEFILTER_DEBUG to show all duplicates)")
-            log.msg(format=fmt, request=request, level=log.DEBUG, spider=spider)
+            self.logger.debug(msg, {'request': request}, extra={'spider': spider})
             self.logdupes = False
 
         spider.crawler.stats.inc_value('dupefilter/filtered', spider=spider)
diff --git a/scrapy/exceptions.py b/scrapy/exceptions.py
index 4bcecd994..0c410f035 100644
--- a/scrapy/exceptions.py
+++ b/scrapy/exceptions.py
@@ -7,43 +7,75 @@ new exceptions here without documenting them there.
 
 # Internal
 
+
 class NotConfigured(Exception):
     """Indicates a missing configuration situation"""
     pass
 
+
+class _InvalidOutput(TypeError):
+    """
+    Indicates an invalid value has been returned by a middleware's processing method.
+    Internal and undocumented, it should not be raised or caught by user code.
+    """
+    pass
+
+
 # HTTP and crawling
 
+
 class IgnoreRequest(Exception):
     """Indicates a decision was made not to process a request"""
 
+
 class DontCloseSpider(Exception):
     """Request the spider not to be closed yet"""
     pass
 
+
 class CloseSpider(Exception):
     """Raise this from callbacks to request the spider to be closed"""
 
     def __init__(self, reason='cancelled'):
-        super(CloseSpider, self).__init__()
+        super().__init__()
         self.reason = reason
 
+
+class StopDownload(Exception):
+    """
+    Stop the download of the body for a given response.
+    The 'fail' boolean parameter indicates whether or not the resulting partial response
+    should be handled by the request errback. Note that 'fail' is a keyword-only argument.
+    """
+
+    def __init__(self, *, fail=True):
+        super().__init__()
+        self.fail = fail
+
+
 # Items
 
+
 class DropItem(Exception):
     """Drop item from the item pipeline"""
     pass
 
+
 class NotSupported(Exception):
     """Indicates a feature or method is not supported"""
     pass
 
+
 # Commands
 
+
 class UsageError(Exception):
     """To indicate a command-line usage error"""
+
     def __init__(self, *a, **kw):
         self.print_help = kw.pop('print_help', True)
-        super(UsageError, self).__init__(*a, **kw)
+        super().__init__(*a, **kw)
+
 
 class ScrapyDeprecationWarning(Warning):
     """Warning category for deprecated features, since the default
@@ -51,6 +83,7 @@ class ScrapyDeprecationWarning(Warning):
     """
     pass
 
+
 class ContractFail(AssertionError):
     """Error raised in case of a failing contract"""
     pass
diff --git a/scrapy/exporters.py b/scrapy/exporters.py
new file mode 100644
index 000000000..95518b3ac
--- /dev/null
+++ b/scrapy/exporters.py
@@ -0,0 +1,337 @@
+"""
+Item Exporters are used to export/serialize items into different formats.
+"""
+
+import csv
+import io
+import marshal
+import pickle
+import pprint
+import warnings
+from xml.sax.saxutils import XMLGenerator
+
+from itemadapter import is_item, ItemAdapter
+
+from scrapy.exceptions import ScrapyDeprecationWarning
+from scrapy.item import _BaseItem
+from scrapy.utils.python import is_listlike, to_bytes, to_unicode
+from scrapy.utils.serialize import ScrapyJSONEncoder
+
+
+__all__ = ['BaseItemExporter', 'PprintItemExporter', 'PickleItemExporter',
+           'CsvItemExporter', 'XmlItemExporter', 'JsonLinesItemExporter',
+           'JsonItemExporter', 'MarshalItemExporter']
+
+
+class BaseItemExporter:
+
+    def __init__(self, *, dont_fail=False, **kwargs):
+        self._kwargs = kwargs
+        self._configure(kwargs, dont_fail=dont_fail)
+
+    def _configure(self, options, dont_fail=False):
+        """Configure the exporter by poping options from the ``options`` dict.
+        If dont_fail is set, it won't raise an exception on unexpected options
+        (useful for using with keyword arguments in subclasses ``__init__`` methods)
+        """
+        self.encoding = options.pop('encoding', None)
+        self.fields_to_export = options.pop('fields_to_export', None)
+        self.export_empty_fields = options.pop('export_empty_fields', False)
+        self.indent = options.pop('indent', None)
+        if not dont_fail and options:
+            raise TypeError("Unexpected options: %s" % ', '.join(options.keys()))
+
+    def export_item(self, item):
+        raise NotImplementedError
+
+    def serialize_field(self, field, name, value):
+        serializer = field.get('serializer', lambda x: x)
+        return serializer(value)
+
+    def start_exporting(self):
+        pass
+
+    def finish_exporting(self):
+        pass
+
+    def _get_serialized_fields(self, item, default_value=None, include_empty=None):
+        """Return the fields to export as an iterable of tuples
+        (name, serialized_value)
+        """
+        item = ItemAdapter(item)
+
+        if include_empty is None:
+            include_empty = self.export_empty_fields
+
+        if self.fields_to_export is None:
+            if include_empty:
+                field_iter = item.field_names()
+            else:
+                field_iter = item.keys()
+        else:
+            if include_empty:
+                field_iter = self.fields_to_export
+            else:
+                field_iter = (x for x in self.fields_to_export if x in item)
+
+        for field_name in field_iter:
+            if field_name in item:
+                field_meta = item.get_field_meta(field_name)
+                value = self.serialize_field(field_meta, field_name, item[field_name])
+            else:
+                value = default_value
+
+            yield field_name, value
+
+
+class JsonLinesItemExporter(BaseItemExporter):
+
+    def __init__(self, file, **kwargs):
+        super().__init__(dont_fail=True, **kwargs)
+        self.file = file
+        self._kwargs.setdefault('ensure_ascii', not self.encoding)
+        self.encoder = ScrapyJSONEncoder(**self._kwargs)
+
+    def export_item(self, item):
+        itemdict = dict(self._get_serialized_fields(item))
+        data = self.encoder.encode(itemdict) + '\n'
+        self.file.write(to_bytes(data, self.encoding))
+
+
+class JsonItemExporter(BaseItemExporter):
+
+    def __init__(self, file, **kwargs):
+        super().__init__(dont_fail=True, **kwargs)
+        self.file = file
+        # there is a small difference between the behaviour or JsonItemExporter.indent
+        # and ScrapyJSONEncoder.indent. ScrapyJSONEncoder.indent=None is needed to prevent
+        # the addition of newlines everywhere
+        json_indent = self.indent if self.indent is not None and self.indent > 0 else None
+        self._kwargs.setdefault('indent', json_indent)
+        self._kwargs.setdefault('ensure_ascii', not self.encoding)
+        self.encoder = ScrapyJSONEncoder(**self._kwargs)
+        self.first_item = True
+
+    def _beautify_newline(self):
+        if self.indent is not None:
+            self.file.write(b'\n')
+
+    def start_exporting(self):
+        self.file.write(b"[")
+        self._beautify_newline()
+
+    def finish_exporting(self):
+        self._beautify_newline()
+        self.file.write(b"]")
+
+    def export_item(self, item):
+        if self.first_item:
+            self.first_item = False
+        else:
+            self.file.write(b',')
+            self._beautify_newline()
+        itemdict = dict(self._get_serialized_fields(item))
+        data = self.encoder.encode(itemdict)
+        self.file.write(to_bytes(data, self.encoding))
+
+
+class XmlItemExporter(BaseItemExporter):
+
+    def __init__(self, file, **kwargs):
+        self.item_element = kwargs.pop('item_element', 'item')
+        self.root_element = kwargs.pop('root_element', 'items')
+        super().__init__(**kwargs)
+        if not self.encoding:
+            self.encoding = 'utf-8'
+        self.xg = XMLGenerator(file, encoding=self.encoding)
+
+    def _beautify_newline(self, new_item=False):
+        if self.indent is not None and (self.indent > 0 or new_item):
+            self.xg.characters('\n')
+
+    def _beautify_indent(self, depth=1):
+        if self.indent:
+            self.xg.characters(' ' * self.indent * depth)
+
+    def start_exporting(self):
+        self.xg.startDocument()
+        self.xg.startElement(self.root_element, {})
+        self._beautify_newline(new_item=True)
+
+    def export_item(self, item):
+        self._beautify_indent(depth=1)
+        self.xg.startElement(self.item_element, {})
+        self._beautify_newline()
+        for name, value in self._get_serialized_fields(item, default_value=''):
+            self._export_xml_field(name, value, depth=2)
+        self._beautify_indent(depth=1)
+        self.xg.endElement(self.item_element)
+        self._beautify_newline(new_item=True)
+
+    def finish_exporting(self):
+        self.xg.endElement(self.root_element)
+        self.xg.endDocument()
+
+    def _export_xml_field(self, name, serialized_value, depth):
+        self._beautify_indent(depth=depth)
+        self.xg.startElement(name, {})
+        if hasattr(serialized_value, 'items'):
+            self._beautify_newline()
+            for subname, value in serialized_value.items():
+                self._export_xml_field(subname, value, depth=depth + 1)
+            self._beautify_indent(depth=depth)
+        elif is_listlike(serialized_value):
+            self._beautify_newline()
+            for value in serialized_value:
+                self._export_xml_field('value', value, depth=depth + 1)
+            self._beautify_indent(depth=depth)
+        elif isinstance(serialized_value, str):
+            self.xg.characters(serialized_value)
+        else:
+            self.xg.characters(str(serialized_value))
+        self.xg.endElement(name)
+        self._beautify_newline()
+
+
+class CsvItemExporter(BaseItemExporter):
+
+    def __init__(self, file, include_headers_line=True, join_multivalued=',', **kwargs):
+        super().__init__(dont_fail=True, **kwargs)
+        if not self.encoding:
+            self.encoding = 'utf-8'
+        self.include_headers_line = include_headers_line
+        self.stream = io.TextIOWrapper(
+            file,
+            line_buffering=False,
+            write_through=True,
+            encoding=self.encoding,
+            newline=''  # Windows needs this https://github.com/scrapy/scrapy/issues/3034
+        )
+        self.csv_writer = csv.writer(self.stream, **self._kwargs)
+        self._headers_not_written = True
+        self._join_multivalued = join_multivalued
+
+    def serialize_field(self, field, name, value):
+        serializer = field.get('serializer', self._join_if_needed)
+        return serializer(value)
+
+    def _join_if_needed(self, value):
+        if isinstance(value, (list, tuple)):
+            try:
+                return self._join_multivalued.join(value)
+            except TypeError:  # list in value may not contain strings
+                pass
+        return value
+
+    def export_item(self, item):
+        if self._headers_not_written:
+            self._headers_not_written = False
+            self._write_headers_and_set_fields_to_export(item)
+
+        fields = self._get_serialized_fields(item, default_value='',
+                                             include_empty=True)
+        values = list(self._build_row(x for _, x in fields))
+        self.csv_writer.writerow(values)
+
+    def _build_row(self, values):
+        for s in values:
+            try:
+                yield to_unicode(s, self.encoding)
+            except TypeError:
+                yield s
+
+    def _write_headers_and_set_fields_to_export(self, item):
+        if self.include_headers_line:
+            if not self.fields_to_export:
+                # use declared field names, or keys if the item is a dict
+                self.fields_to_export = ItemAdapter(item).field_names()
+            row = list(self._build_row(self.fields_to_export))
+            self.csv_writer.writerow(row)
+
+
+class PickleItemExporter(BaseItemExporter):
+
+    def __init__(self, file, protocol=4, **kwargs):
+        super().__init__(**kwargs)
+        self.file = file
+        self.protocol = protocol
+
+    def export_item(self, item):
+        d = dict(self._get_serialized_fields(item))
+        pickle.dump(d, self.file, self.protocol)
+
+
+class MarshalItemExporter(BaseItemExporter):
+    """Exports items in a Python-specific binary format (see
+    :mod:`marshal`).
+
+    :param file: The file-like object to use for exporting the data. Its
+                 ``write`` method should accept :class:`bytes` (a disk file
+                 opened in binary mode, a :class:`~io.BytesIO` object, etc)
+    """
+
+    def __init__(self, file, **kwargs):
+        super().__init__(**kwargs)
+        self.file = file
+
+    def export_item(self, item):
+        marshal.dump(dict(self._get_serialized_fields(item)), self.file)
+
+
+class PprintItemExporter(BaseItemExporter):
+
+    def __init__(self, file, **kwargs):
+        super().__init__(**kwargs)
+        self.file = file
+
+    def export_item(self, item):
+        itemdict = dict(self._get_serialized_fields(item))
+        self.file.write(to_bytes(pprint.pformat(itemdict) + '\n'))
+
+
+class PythonItemExporter(BaseItemExporter):
+    """This is a base class for item exporters that extends
+    :class:`BaseItemExporter` with support for nested items.
+
+    It serializes items to built-in Python types, so that any serialization
+    library (e.g. :mod:`json` or msgpack_) can be used on top of it.
+
+    .. _msgpack: https://pypi.org/project/msgpack/
+    """
+
+    def _configure(self, options, dont_fail=False):
+        self.binary = options.pop('binary', True)
+        super()._configure(options, dont_fail)
+        if self.binary:
+            warnings.warn(
+                "PythonItemExporter will drop support for binary export in the future",
+                ScrapyDeprecationWarning)
+        if not self.encoding:
+            self.encoding = 'utf-8'
+
+    def serialize_field(self, field, name, value):
+        serializer = field.get('serializer', self._serialize_value)
+        return serializer(value)
+
+    def _serialize_value(self, value):
+        if isinstance(value, _BaseItem):
+            return self.export_item(value)
+        elif is_item(value):
+            return dict(self._serialize_item(value))
+        elif is_listlike(value):
+            return [self._serialize_value(v) for v in value]
+        encode_func = to_bytes if self.binary else to_unicode
+        if isinstance(value, (str, bytes)):
+            return encode_func(value, encoding=self.encoding)
+        return value
+
+    def _serialize_item(self, item):
+        for key, value in ItemAdapter(item).items():
+            key = to_bytes(key) if self.binary else key
+            yield key, self._serialize_value(value)
+
+    def export_item(self, item):
+        result = dict(self._get_serialized_fields(item))
+        if self.binary:
+            result = dict(self._serialize_item(result))
+        return result
diff --git a/scrapy/extension.py b/scrapy/extension.py
index f68b1ba68..050b87e5f 100644
--- a/scrapy/extension.py
+++ b/scrapy/extension.py
@@ -6,11 +6,11 @@ See documentation in docs/topics/extensions.rst
 from scrapy.middleware import MiddlewareManager
 from scrapy.utils.conf import build_component_list
 
+
 class ExtensionManager(MiddlewareManager):
 
     component_name = 'extension'
 
     @classmethod
     def _get_mwlist_from_settings(cls, settings):
-        return build_component_list(settings['EXTENSIONS_BASE'], \
-            settings['EXTENSIONS'])
+        return build_component_list(settings.getwithbase('EXTENSIONS'))
diff --git a/scrapy/contrib/downloadermiddleware/__init__.py b/scrapy/extensions/__init__.py
similarity index 100%
rename from scrapy/contrib/downloadermiddleware/__init__.py
rename to scrapy/extensions/__init__.py
diff --git a/scrapy/contrib/closespider.py b/scrapy/extensions/closespider.py
similarity index 84%
rename from scrapy/contrib/closespider.py
rename to scrapy/extensions/closespider.py
index a5df5e8a7..812844c0a 100644
--- a/scrapy/contrib/closespider.py
+++ b/scrapy/extensions/closespider.py
@@ -6,12 +6,11 @@ See documentation in docs/topics/extensions.rst
 
 from collections import defaultdict
 
-from twisted.internet import reactor
-
 from scrapy import signals
+from scrapy.exceptions import NotConfigured
 
 
-class CloseSpider(object):
+class CloseSpider:
 
     def __init__(self, crawler):
         self.crawler = crawler
@@ -21,7 +20,10 @@ class CloseSpider(object):
             'itemcount': crawler.settings.getint('CLOSESPIDER_ITEMCOUNT'),
             'pagecount': crawler.settings.getint('CLOSESPIDER_PAGECOUNT'),
             'errorcount': crawler.settings.getint('CLOSESPIDER_ERRORCOUNT'),
-            }
+        }
+
+        if not any(self.close_on.values()):
+            raise NotConfigured
 
         self.counter = defaultdict(int)
 
@@ -50,9 +52,10 @@ class CloseSpider(object):
             self.crawler.engine.close_spider(spider, 'closespider_pagecount')
 
     def spider_opened(self, spider):
-        self.task = reactor.callLater(self.close_on['timeout'], \
-            self.crawler.engine.close_spider, spider, \
-            reason='closespider_timeout')
+        from twisted.internet import reactor
+        self.task = reactor.callLater(self.close_on['timeout'],
+                                      self.crawler.engine.close_spider, spider,
+                                      reason='closespider_timeout')
 
     def item_scraped(self, item, spider):
         self.counter['itemcount'] += 1
diff --git a/scrapy/contrib/corestats.py b/scrapy/extensions/corestats.py
similarity index 71%
rename from scrapy/contrib/corestats.py
rename to scrapy/extensions/corestats.py
index 3d9a307b7..389cb65bc 100644
--- a/scrapy/contrib/corestats.py
+++ b/scrapy/extensions/corestats.py
@@ -1,14 +1,16 @@
 """
 Extension for collecting core stats like items scraped and start/finish times
 """
-import datetime
+from datetime import datetime
 
 from scrapy import signals
 
-class CoreStats(object):
+
+class CoreStats:
 
     def __init__(self, stats):
         self.stats = stats
+        self.start_time = None
 
     @classmethod
     def from_crawler(cls, crawler):
@@ -21,10 +23,15 @@ class CoreStats(object):
         return o
 
     def spider_opened(self, spider):
-        self.stats.set_value('start_time', datetime.datetime.utcnow(), spider=spider)
+        self.start_time = datetime.utcnow()
+        self.stats.set_value('start_time', self.start_time, spider=spider)
 
     def spider_closed(self, spider, reason):
-        self.stats.set_value('finish_time', datetime.datetime.utcnow(), spider=spider)
+        finish_time = datetime.utcnow()
+        elapsed_time = finish_time - self.start_time
+        elapsed_time_seconds = elapsed_time.total_seconds()
+        self.stats.set_value('elapsed_time_seconds', elapsed_time_seconds, spider=spider)
+        self.stats.set_value('finish_time', finish_time, spider=spider)
         self.stats.set_value('finish_reason', reason, spider=spider)
 
     def item_scraped(self, item, spider):
diff --git a/scrapy/contrib/debug.py b/scrapy/extensions/debug.py
similarity index 74%
rename from scrapy/contrib/debug.py
rename to scrapy/extensions/debug.py
index 18a746d31..586399784 100644
--- a/scrapy/contrib/debug.py
+++ b/scrapy/extensions/debug.py
@@ -6,16 +6,18 @@ See documentation in docs/topics/extensions.rst
 
 import sys
 import signal
+import logging
 import traceback
 import threading
 from pdb import Pdb
 
 from scrapy.utils.engine import format_engine_status
 from scrapy.utils.trackref import format_live_refs
-from scrapy import log
+
+logger = logging.getLogger(__name__)
 
 
-class StackTraceDump(object):
+class StackTraceDump:
 
     def __init__(self, crawler=None):
         self.crawler = crawler
@@ -31,12 +33,14 @@ class StackTraceDump(object):
         return cls(crawler)
 
     def dump_stacktrace(self, signum, frame):
-        stackdumps = self._thread_stacks()
-        enginestatus = format_engine_status(self.crawler.engine)
-        liverefs = format_live_refs()
-        msg = "Dumping stack trace and engine status" \
-            "\n{0}\n{1}\n{2}".format(enginestatus, liverefs, stackdumps)
-        log.msg(msg)
+        log_args = {
+            'stackdumps': self._thread_stacks(),
+            'enginestatus': format_engine_status(self.crawler.engine),
+            'liverefs': format_live_refs(),
+        }
+        logger.info("Dumping stack trace and engine status\n"
+                    "%(enginestatus)s\n%(liverefs)s\n%(stackdumps)s",
+                    log_args, extra={'crawler': self.crawler})
 
     def _thread_stacks(self):
         id2name = dict((th.ident, th.name) for th in threading.enumerate())
@@ -48,7 +52,7 @@ class StackTraceDump(object):
         return dumps
 
 
-class Debugger(object):
+class Debugger:
     def __init__(self):
         try:
             signal.signal(signal.SIGUSR2, self._enter_debugger)
diff --git a/scrapy/extensions/feedexport.py b/scrapy/extensions/feedexport.py
new file mode 100644
index 000000000..980825499
--- /dev/null
+++ b/scrapy/extensions/feedexport.py
@@ -0,0 +1,490 @@
+"""
+Feed Exports extension
+
+See documentation in docs/topics/feed-exports.rst
+"""
+
+import logging
+import os
+import re
+import sys
+import warnings
+from datetime import datetime
+from tempfile import NamedTemporaryFile
+from urllib.parse import unquote, urlparse
+
+from twisted.internet import defer, threads
+from w3lib.url import file_uri_to_path
+from zope.interface import implementer, Interface
+
+from scrapy import signals
+from scrapy.exceptions import NotConfigured, ScrapyDeprecationWarning
+from scrapy.utils.boto import is_botocore
+from scrapy.utils.conf import feed_complete_default_values_from_settings
+from scrapy.utils.ftp import ftp_store_file
+from scrapy.utils.log import failure_to_exc_info
+from scrapy.utils.misc import create_instance, load_object
+from scrapy.utils.python import get_func_args, without_none_values
+
+
+logger = logging.getLogger(__name__)
+
+
+def build_storage(builder, uri, *args, feed_options=None, preargs=(), **kwargs):
+    argument_names = get_func_args(builder)
+    if 'feed_options' in argument_names:
+        kwargs['feed_options'] = feed_options
+    else:
+        warnings.warn(
+            "{} does not support the 'feed_options' keyword argument. Add a "
+            "'feed_options' parameter to its signature to remove this "
+            "warning. This parameter will become mandatory in a future "
+            "version of Scrapy."
+            .format(builder.__qualname__),
+            category=ScrapyDeprecationWarning
+        )
+    return builder(*preargs, uri, *args, **kwargs)
+
+
+class IFeedStorage(Interface):
+    """Interface that all Feed Storages must implement"""
+
+    def __init__(uri, *, feed_options=None):
+        """Initialize the storage with the parameters given in the URI and the
+        feed-specific options (see :setting:`FEEDS`)"""
+
+    def open(spider):
+        """Open the storage for the given spider. It must return a file-like
+        object that will be used for the exporters"""
+
+    def store(file):
+        """Store the given file stream"""
+
+
+@implementer(IFeedStorage)
+class BlockingFeedStorage:
+
+    def open(self, spider):
+        path = spider.crawler.settings['FEED_TEMPDIR']
+        if path and not os.path.isdir(path):
+            raise OSError('Not a Directory: ' + str(path))
+
+        return NamedTemporaryFile(prefix='feed-', dir=path)
+
+    def store(self, file):
+        return threads.deferToThread(self._store_in_thread, file)
+
+    def _store_in_thread(self, file):
+        raise NotImplementedError
+
+
+@implementer(IFeedStorage)
+class StdoutFeedStorage:
+
+    def __init__(self, uri, _stdout=None, *, feed_options=None):
+        if not _stdout:
+            _stdout = sys.stdout.buffer
+        self._stdout = _stdout
+        if feed_options and feed_options.get('overwrite', False) is True:
+            logger.warning('Standard output (stdout) storage does not support '
+                           'overwriting. To suppress this warning, remove the '
+                           'overwrite option from your FEEDS setting, or set '
+                           'it to False.')
+
+    def open(self, spider):
+        return self._stdout
+
+    def store(self, file):
+        pass
+
+
+@implementer(IFeedStorage)
+class FileFeedStorage:
+
+    def __init__(self, uri, *, feed_options=None):
+        self.path = file_uri_to_path(uri)
+        feed_options = feed_options or {}
+        self.write_mode = 'wb' if feed_options.get('overwrite', False) else 'ab'
+
+    def open(self, spider):
+        dirname = os.path.dirname(self.path)
+        if dirname and not os.path.exists(dirname):
+            os.makedirs(dirname)
+        return open(self.path, self.write_mode)
+
+    def store(self, file):
+        file.close()
+
+
+class S3FeedStorage(BlockingFeedStorage):
+
+    def __init__(self, uri, access_key=None, secret_key=None, acl=None, *,
+                 feed_options=None):
+        u = urlparse(uri)
+        self.bucketname = u.hostname
+        self.access_key = u.username or access_key
+        self.secret_key = u.password or secret_key
+        self.is_botocore = is_botocore()
+        self.keyname = u.path[1:]  # remove first "/"
+        self.acl = acl
+        if self.is_botocore:
+            import botocore.session
+            session = botocore.session.get_session()
+            self.s3_client = session.create_client(
+                's3', aws_access_key_id=self.access_key,
+                aws_secret_access_key=self.secret_key)
+        else:
+            import boto
+            self.connect_s3 = boto.connect_s3
+        if feed_options and feed_options.get('overwrite', True) is False:
+            logger.warning('S3 does not support appending to files. To '
+                           'suppress this warning, remove the overwrite '
+                           'option from your FEEDS setting or set it to True.')
+
+    @classmethod
+    def from_crawler(cls, crawler, uri, *, feed_options=None):
+        return build_storage(
+            cls,
+            uri,
+            access_key=crawler.settings['AWS_ACCESS_KEY_ID'],
+            secret_key=crawler.settings['AWS_SECRET_ACCESS_KEY'],
+            acl=crawler.settings['FEED_STORAGE_S3_ACL'] or None,
+            feed_options=feed_options,
+        )
+
+    def _store_in_thread(self, file):
+        file.seek(0)
+        if self.is_botocore:
+            kwargs = {'ACL': self.acl} if self.acl else {}
+            self.s3_client.put_object(
+                Bucket=self.bucketname, Key=self.keyname, Body=file,
+                **kwargs)
+        else:
+            conn = self.connect_s3(self.access_key, self.secret_key)
+            bucket = conn.get_bucket(self.bucketname, validate=False)
+            key = bucket.new_key(self.keyname)
+            kwargs = {'policy': self.acl} if self.acl else {}
+            key.set_contents_from_file(file, **kwargs)
+            key.close()
+        file.close()
+
+
+class GCSFeedStorage(BlockingFeedStorage):
+
+    def __init__(self, uri, project_id, acl):
+        self.project_id = project_id
+        self.acl = acl
+        u = urlparse(uri)
+        self.bucket_name = u.hostname
+        self.blob_name = u.path[1:]  # remove first "/"
+
+    @classmethod
+    def from_crawler(cls, crawler, uri):
+        return cls(
+            uri,
+            crawler.settings['GCS_PROJECT_ID'],
+            crawler.settings['FEED_STORAGE_GCS_ACL'] or None
+        )
+
+    def _store_in_thread(self, file):
+        file.seek(0)
+        from google.cloud.storage import Client
+        client = Client(project=self.project_id)
+        bucket = client.get_bucket(self.bucket_name)
+        blob = bucket.blob(self.blob_name)
+        blob.upload_from_file(file, predefined_acl=self.acl)
+
+
+class FTPFeedStorage(BlockingFeedStorage):
+
+    def __init__(self, uri, use_active_mode=False, *, feed_options=None):
+        u = urlparse(uri)
+        self.host = u.hostname
+        self.port = int(u.port or '21')
+        self.username = u.username
+        self.password = unquote(u.password or '')
+        self.path = u.path
+        self.use_active_mode = use_active_mode
+        self.overwrite = not feed_options or feed_options.get('overwrite', True)
+
+    @classmethod
+    def from_crawler(cls, crawler, uri, *, feed_options=None):
+        return build_storage(
+            cls,
+            uri,
+            crawler.settings.getbool('FEED_STORAGE_FTP_ACTIVE'),
+            feed_options=feed_options,
+        )
+
+    def _store_in_thread(self, file):
+        ftp_store_file(
+            path=self.path, file=file, host=self.host,
+            port=self.port, username=self.username,
+            password=self.password, use_active_mode=self.use_active_mode,
+            overwrite=self.overwrite,
+        )
+
+
+class _FeedSlot:
+    def __init__(self, file, exporter, storage, uri, format, store_empty, batch_id, uri_template):
+        self.file = file
+        self.exporter = exporter
+        self.storage = storage
+        # feed params
+        self.batch_id = batch_id
+        self.format = format
+        self.store_empty = store_empty
+        self.uri_template = uri_template
+        self.uri = uri
+        # flags
+        self.itemcount = 0
+        self._exporting = False
+
+    def start_exporting(self):
+        if not self._exporting:
+            self.exporter.start_exporting()
+            self._exporting = True
+
+    def finish_exporting(self):
+        if self._exporting:
+            self.exporter.finish_exporting()
+            self._exporting = False
+
+
+class FeedExporter:
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        exporter = cls(crawler)
+        crawler.signals.connect(exporter.open_spider, signals.spider_opened)
+        crawler.signals.connect(exporter.close_spider, signals.spider_closed)
+        crawler.signals.connect(exporter.item_scraped, signals.item_scraped)
+        return exporter
+
+    def __init__(self, crawler):
+        self.crawler = crawler
+        self.settings = crawler.settings
+        self.feeds = {}
+        self.slots = []
+
+        if not self.settings['FEEDS'] and not self.settings['FEED_URI']:
+            raise NotConfigured
+
+        # Begin: Backward compatibility for FEED_URI and FEED_FORMAT settings
+        if self.settings['FEED_URI']:
+            warnings.warn(
+                'The `FEED_URI` and `FEED_FORMAT` settings have been deprecated in favor of '
+                'the `FEEDS` setting. Please see the `FEEDS` setting docs for more details',
+                category=ScrapyDeprecationWarning, stacklevel=2,
+            )
+            uri = str(self.settings['FEED_URI'])  # handle pathlib.Path objects
+            feed_options = {'format': self.settings.get('FEED_FORMAT', 'jsonlines')}
+            self.feeds[uri] = feed_complete_default_values_from_settings(feed_options, self.settings)
+        # End: Backward compatibility for FEED_URI and FEED_FORMAT settings
+
+        # 'FEEDS' setting takes precedence over 'FEED_URI'
+        for uri, feed_options in self.settings.getdict('FEEDS').items():
+            uri = str(uri)  # handle pathlib.Path objects
+            self.feeds[uri] = feed_complete_default_values_from_settings(feed_options, self.settings)
+
+        self.storages = self._load_components('FEED_STORAGES')
+        self.exporters = self._load_components('FEED_EXPORTERS')
+        for uri, feed_options in self.feeds.items():
+            if not self._storage_supported(uri, feed_options):
+                raise NotConfigured
+            if not self._settings_are_valid():
+                raise NotConfigured
+            if not self._exporter_supported(feed_options['format']):
+                raise NotConfigured
+
+    def open_spider(self, spider):
+        for uri, feed_options in self.feeds.items():
+            uri_params = self._get_uri_params(spider, feed_options['uri_params'])
+            self.slots.append(self._start_new_batch(
+                batch_id=1,
+                uri=uri % uri_params,
+                feed_options=feed_options,
+                spider=spider,
+                uri_template=uri,
+            ))
+
+    def close_spider(self, spider):
+        deferred_list = []
+        for slot in self.slots:
+            d = self._close_slot(slot, spider)
+            deferred_list.append(d)
+        return defer.DeferredList(deferred_list) if deferred_list else None
+
+    def _close_slot(self, slot, spider):
+        if not slot.itemcount and not slot.store_empty:
+            # We need to call slot.storage.store nonetheless to get the file
+            # properly closed.
+            return defer.maybeDeferred(slot.storage.store, slot.file)
+        slot.finish_exporting()
+        logfmt = "%s %%(format)s feed (%%(itemcount)d items) in: %%(uri)s"
+        log_args = {'format': slot.format,
+                    'itemcount': slot.itemcount,
+                    'uri': slot.uri}
+        d = defer.maybeDeferred(slot.storage.store, slot.file)
+
+        # Use `largs=log_args` to copy log_args into function's scope
+        # instead of using `log_args` from the outer scope
+        d.addCallback(
+            lambda _, largs=log_args: logger.info(
+                logfmt % "Stored", largs, extra={'spider': spider}
+            )
+        )
+        d.addErrback(
+            lambda f, largs=log_args: logger.error(
+                logfmt % "Error storing", largs,
+                exc_info=failure_to_exc_info(f), extra={'spider': spider}
+            )
+        )
+        return d
+
+    def _start_new_batch(self, batch_id, uri, feed_options, spider, uri_template):
+        """
+        Redirect the output data stream to a new file.
+        Execute multiple times if FEED_EXPORT_BATCH_ITEM_COUNT setting or FEEDS.batch_item_count is specified
+        :param batch_id: sequence number of current batch
+        :param uri: uri of the new batch to start
+        :param feed_options: dict with parameters of feed
+        :param spider: user spider
+        :param uri_template: template of uri which contains %(batch_time)s or %(batch_id)d to create new uri
+        """
+        storage = self._get_storage(uri, feed_options)
+        file = storage.open(spider)
+        exporter = self._get_exporter(
+            file=file,
+            format=feed_options['format'],
+            fields_to_export=feed_options['fields'],
+            encoding=feed_options['encoding'],
+            indent=feed_options['indent'],
+        )
+        slot = _FeedSlot(
+            file=file,
+            exporter=exporter,
+            storage=storage,
+            uri=uri,
+            format=feed_options['format'],
+            store_empty=feed_options['store_empty'],
+            batch_id=batch_id,
+            uri_template=uri_template,
+        )
+        if slot.store_empty:
+            slot.start_exporting()
+        return slot
+
+    def item_scraped(self, item, spider):
+        slots = []
+        for slot in self.slots:
+            slot.start_exporting()
+            slot.exporter.export_item(item)
+            slot.itemcount += 1
+            # create new slot for each slot with itemcount == FEED_EXPORT_BATCH_ITEM_COUNT and close the old one
+            if (
+                self.feeds[slot.uri_template]['batch_item_count']
+                and slot.itemcount >= self.feeds[slot.uri_template]['batch_item_count']
+            ):
+                uri_params = self._get_uri_params(spider, self.feeds[slot.uri_template]['uri_params'], slot)
+                self._close_slot(slot, spider)
+                slots.append(self._start_new_batch(
+                    batch_id=slot.batch_id + 1,
+                    uri=slot.uri_template % uri_params,
+                    feed_options=self.feeds[slot.uri_template],
+                    spider=spider,
+                    uri_template=slot.uri_template,
+                ))
+            else:
+                slots.append(slot)
+        self.slots = slots
+
+    def _load_components(self, setting_prefix):
+        conf = without_none_values(self.settings.getwithbase(setting_prefix))
+        d = {}
+        for k, v in conf.items():
+            try:
+                d[k] = load_object(v)
+            except NotConfigured:
+                pass
+        return d
+
+    def _exporter_supported(self, format):
+        if format in self.exporters:
+            return True
+        logger.error("Unknown feed format: %(format)s", {'format': format})
+
+    def _settings_are_valid(self):
+        """
+        If FEED_EXPORT_BATCH_ITEM_COUNT setting or FEEDS.batch_item_count is specified uri has to contain
+        %(batch_time)s or %(batch_id)d to distinguish different files of partial output
+        """
+        for uri_template, values in self.feeds.items():
+            if values['batch_item_count'] and not re.search(r'%\(batch_time\)s|%\(batch_id\)', uri_template):
+                logger.error(
+                    '%(batch_time)s or %(batch_id)d must be in the feed URI ({}) if FEED_EXPORT_BATCH_ITEM_COUNT '
+                    'setting or FEEDS.batch_item_count is specified and greater than 0. For more info see: '
+                    'https://docs.scrapy.org/en/latest/topics/feed-exports.html#feed-export-batch-item-count'
+                    ''.format(uri_template)
+                )
+                return False
+        return True
+
+    def _storage_supported(self, uri, feed_options):
+        scheme = urlparse(uri).scheme
+        if scheme in self.storages:
+            try:
+                self._get_storage(uri, feed_options)
+                return True
+            except NotConfigured as e:
+                logger.error("Disabled feed storage scheme: %(scheme)s. "
+                             "Reason: %(reason)s",
+                             {'scheme': scheme, 'reason': str(e)})
+        else:
+            logger.error("Unknown feed storage scheme: %(scheme)s",
+                         {'scheme': scheme})
+
+    def _get_instance(self, objcls, *args, **kwargs):
+        return create_instance(
+            objcls, self.settings, getattr(self, 'crawler', None),
+            *args, **kwargs)
+
+    def _get_exporter(self, file, format, *args, **kwargs):
+        return self._get_instance(self.exporters[format], file, *args, **kwargs)
+
+    def _get_storage(self, uri, feed_options):
+        """Fork of create_instance specific to feed storage classes
+
+        It supports not passing the *feed_options* parameters to classes that
+        do not support it, and issuing a deprecation warning instead.
+        """
+        feedcls = self.storages[urlparse(uri).scheme]
+        crawler = getattr(self, 'crawler', None)
+
+        def build_instance(builder, *preargs):
+            return build_storage(builder, uri, preargs=preargs)
+
+        if crawler and hasattr(feedcls, 'from_crawler'):
+            instance = build_instance(feedcls.from_crawler, crawler)
+            method_name = 'from_crawler'
+        elif hasattr(feedcls, 'from_settings'):
+            instance = build_instance(feedcls.from_settings, self.settings)
+            method_name = 'from_settings'
+        else:
+            instance = build_instance(feedcls)
+            method_name = '__new__'
+        if instance is None:
+            raise TypeError("%s.%s returned None" % (feedcls.__qualname__, method_name))
+        return instance
+
+    def _get_uri_params(self, spider, uri_params, slot=None):
+        params = {}
+        for k in dir(spider):
+            params[k] = getattr(spider, k)
+        utc_now = datetime.utcnow()
+        params['time'] = utc_now.replace(microsecond=0).isoformat().replace(':', '-')
+        params['batch_time'] = utc_now.isoformat().replace(':', '-')
+        params['batch_id'] = slot.batch_id + 1 if slot is not None else 1
+        uripar_function = load_object(uri_params) if uri_params else lambda x, y: None
+        uripar_function(params, spider)
+        return params
diff --git a/scrapy/contrib/httpcache.py b/scrapy/extensions/httpcache.py
similarity index 62%
rename from scrapy/contrib/httpcache.py
rename to scrapy/extensions/httpcache.py
index c5cb3023f..6294a9b52 100644
--- a/scrapy/contrib/httpcache.py
+++ b/scrapy/extensions/httpcache.py
@@ -1,19 +1,26 @@
-from __future__ import print_function
+import gzip
+import logging
 import os
-from six.moves import cPickle as pickle
+import pickle
+from email.utils import mktime_tz, parsedate_tz
 from importlib import import_module
 from time import time
 from weakref import WeakKeyDictionary
-from email.utils import mktime_tz, parsedate_tz
+
 from w3lib.http import headers_raw_to_dict, headers_dict_to_raw
-from scrapy.http import Headers
+
+from scrapy.http import Headers, Response
 from scrapy.responsetypes import responsetypes
-from scrapy.utils.request import request_fingerprint
-from scrapy.utils.project import data_path
 from scrapy.utils.httpobj import urlparse_cached
+from scrapy.utils.project import data_path
+from scrapy.utils.python import to_bytes, to_unicode
+from scrapy.utils.request import request_fingerprint
 
 
-class DummyPolicy(object):
+logger = logging.getLogger(__name__)
+
+
+class DummyPolicy:
 
     def __init__(self, settings):
         self.ignore_schemes = settings.getlist('HTTPCACHE_IGNORE_SCHEMES')
@@ -25,25 +32,33 @@ class DummyPolicy(object):
     def should_cache_response(self, response, request):
         return response.status not in self.ignore_http_codes
 
-    def is_cached_response_fresh(self, response, request):
+    def is_cached_response_fresh(self, cachedresponse, request):
         return True
 
     def is_cached_response_valid(self, cachedresponse, response, request):
         return True
 
 
-class RFC2616Policy(object):
+class RFC2616Policy:
 
     MAXAGE = 3600 * 24 * 365  # one year
 
     def __init__(self, settings):
+        self.always_store = settings.getbool('HTTPCACHE_ALWAYS_STORE')
         self.ignore_schemes = settings.getlist('HTTPCACHE_IGNORE_SCHEMES')
         self._cc_parsed = WeakKeyDictionary()
+        self.ignore_response_cache_controls = [
+            to_bytes(cc) for cc in settings.getlist('HTTPCACHE_IGNORE_RESPONSE_CACHE_CONTROLS')
+        ]
 
     def _parse_cachecontrol(self, r):
         if r not in self._cc_parsed:
-            cch = r.headers.get('Cache-Control', '')
-            self._cc_parsed[r] = parse_cachecontrol(cch)
+            cch = r.headers.get(b'Cache-Control', b'')
+            parsed = parse_cachecontrol(cch)
+            if isinstance(r, Response):
+                for key in self.ignore_response_cache_controls:
+                    parsed.pop(key, None)
+            self._cc_parsed[r] = parsed
         return self._cc_parsed[r]
 
     def should_cache_request(self, request):
@@ -51,31 +66,34 @@ class RFC2616Policy(object):
             return False
         cc = self._parse_cachecontrol(request)
         # obey user-agent directive "Cache-Control: no-store"
-        if 'no-store' in cc:
+        if b'no-store' in cc:
             return False
         # Any other is eligible for caching
         return True
 
     def should_cache_response(self, response, request):
-        # What is cacheable - http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec14.9.1
-        # Response cacheability - http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.4
+        # What is cacheable - https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.9.1
+        # Response cacheability - https://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.4
         # Status code 206 is not included because cache can not deal with partial contents
         cc = self._parse_cachecontrol(response)
         # obey directive "Cache-Control: no-store"
-        if 'no-store' in cc:
+        if b'no-store' in cc:
             return False
         # Never cache 304 (Not Modified) responses
         elif response.status == 304:
             return False
+        # Cache unconditionally if configured to do so
+        elif self.always_store:
+            return True
         # Any hint on response expiration is good
-        elif 'max-age' in cc or 'Expires' in response.headers:
+        elif b'max-age' in cc or b'Expires' in response.headers:
             return True
         # Firefox fallbacks this statuses to one year expiration if none is set
         elif response.status in (300, 301, 308):
             return True
         # Other statuses without expiration requires at least one validator
         elif response.status in (200, 203, 401):
-            return 'Last-Modified' in response.headers or 'ETag' in response.headers
+            return b'Last-Modified' in response.headers or b'ETag' in response.headers
         # Any other is probably not eligible for caching
         # Makes no sense to cache responses that does not contain expiration
         # info and can not be revalidated
@@ -85,51 +103,87 @@ class RFC2616Policy(object):
     def is_cached_response_fresh(self, cachedresponse, request):
         cc = self._parse_cachecontrol(cachedresponse)
         ccreq = self._parse_cachecontrol(request)
-        if 'no-cache' in cc or 'no-cache' in ccreq:
+        if b'no-cache' in cc or b'no-cache' in ccreq:
             return False
 
         now = time()
         freshnesslifetime = self._compute_freshness_lifetime(cachedresponse, request, now)
         currentage = self._compute_current_age(cachedresponse, request, now)
+
+        reqmaxage = self._get_max_age(ccreq)
+        if reqmaxage is not None:
+            freshnesslifetime = min(freshnesslifetime, reqmaxage)
+
         if currentage < freshnesslifetime:
             return True
+
+        if b'max-stale' in ccreq and b'must-revalidate' not in cc:
+            # From RFC2616: "Indicates that the client is willing to
+            # accept a response that has exceeded its expiration time.
+            # If max-stale is assigned a value, then the client is
+            # willing to accept a response that has exceeded its
+            # expiration time by no more than the specified number of
+            # seconds. If no value is assigned to max-stale, then the
+            # client is willing to accept a stale response of any age."
+            staleage = ccreq[b'max-stale']
+            if staleage is None:
+                return True
+
+            try:
+                if currentage < freshnesslifetime + max(0, int(staleage)):
+                    return True
+            except ValueError:
+                pass
+
         # Cached response is stale, try to set validators if any
         self._set_conditional_validators(request, cachedresponse)
         return False
 
     def is_cached_response_valid(self, cachedresponse, response, request):
+        # Use the cached response if the new response is a server error,
+        # as long as the old response didn't specify must-revalidate.
+        if response.status >= 500:
+            cc = self._parse_cachecontrol(cachedresponse)
+            if b'must-revalidate' not in cc:
+                return True
+
+        # Use the cached response if the server says it hasn't changed.
         return response.status == 304
 
     def _set_conditional_validators(self, request, cachedresponse):
-        if 'Last-Modified' in cachedresponse.headers:
-            request.headers['If-Modified-Since'] = cachedresponse.headers['Last-Modified']
+        if b'Last-Modified' in cachedresponse.headers:
+            request.headers[b'If-Modified-Since'] = cachedresponse.headers[b'Last-Modified']
 
-        if 'ETag' in cachedresponse.headers:
-            request.headers['If-None-Match'] = cachedresponse.headers['ETag']
+        if b'ETag' in cachedresponse.headers:
+            request.headers[b'If-None-Match'] = cachedresponse.headers[b'ETag']
+
+    def _get_max_age(self, cc):
+        try:
+            return max(0, int(cc[b'max-age']))
+        except (KeyError, ValueError):
+            return None
 
     def _compute_freshness_lifetime(self, response, request, now):
         # Reference nsHttpResponseHead::ComputeFreshnessLifetime
-        # http://dxr.mozilla.org/mozilla-central/source/netwerk/protocol/http/nsHttpResponseHead.cpp#410
+        # https://dxr.mozilla.org/mozilla-central/source/netwerk/protocol/http/nsHttpResponseHead.cpp#706
         cc = self._parse_cachecontrol(response)
-        if 'max-age' in cc:
-            try:
-                return max(0, int(cc['max-age']))
-            except ValueError:
-                pass
+        maxage = self._get_max_age(cc)
+        if maxage is not None:
+            return maxage
 
         # Parse date header or synthesize it if none exists
-        date = rfc1123_to_epoch(response.headers.get('Date')) or now
+        date = rfc1123_to_epoch(response.headers.get(b'Date')) or now
 
         # Try HTTP/1.0 Expires header
-        if 'Expires' in response.headers:
-            expires = rfc1123_to_epoch(response.headers['Expires'])
+        if b'Expires' in response.headers:
+            expires = rfc1123_to_epoch(response.headers[b'Expires'])
             # When parsing Expires header fails RFC 2616 section 14.21 says we
             # should treat this as an expiration time in the past.
             return max(0, expires - date) if expires else 0
 
         # Fallback to heuristic using last-modified header
         # This is not in RFC but on Firefox caching implementation
-        lastmodified = rfc1123_to_epoch(response.headers.get('Last-Modified'))
+        lastmodified = rfc1123_to_epoch(response.headers.get(b'Last-Modified'))
         if lastmodified and lastmodified <= date:
             return (date - lastmodified) / 10
 
@@ -142,17 +196,17 @@ class RFC2616Policy(object):
 
     def _compute_current_age(self, response, request, now):
         # Reference nsHttpResponseHead::ComputeCurrentAge
-        # http://dxr.mozilla.org/mozilla-central/source/netwerk/protocol/http/nsHttpResponseHead.cpp#366
+        # https://dxr.mozilla.org/mozilla-central/source/netwerk/protocol/http/nsHttpResponseHead.cpp#658
         currentage = 0
         # If Date header is not set we assume it is a fast connection, and
         # clock is in sync with the server
-        date = rfc1123_to_epoch(response.headers.get('Date')) or now
+        date = rfc1123_to_epoch(response.headers.get(b'Date')) or now
         if now > date:
             currentage = now - date
 
-        if 'Age' in response.headers:
+        if b'Age' in response.headers:
             try:
-                age = int(response.headers['Age'])
+                age = int(response.headers[b'Age'])
                 currentage = max(currentage, age)
             except ValueError:
                 pass
@@ -160,7 +214,7 @@ class RFC2616Policy(object):
         return currentage
 
 
-class DbmCacheStorage(object):
+class DbmCacheStorage:
 
     def __init__(self, settings):
         self.cachedir = data_path(settings['HTTPCACHE_DIR'], createdir=True)
@@ -172,6 +226,8 @@ class DbmCacheStorage(object):
         dbpath = os.path.join(self.cachedir, '%s.db' % spider.name)
         self.db = self.dbmodule.open(dbpath, 'c')
 
+        logger.debug("Using DBM cache storage in %(cachepath)s" % {'cachepath': dbpath}, extra={'spider': spider})
+
     def close_spider(self, spider):
         self.db.close()
 
@@ -195,7 +251,7 @@ class DbmCacheStorage(object):
             'headers': dict(response.headers),
             'body': response.body,
         }
-        self.db['%s_data' % key] = pickle.dumps(data, protocol=2)
+        self.db['%s_data' % key] = pickle.dumps(data, protocol=4)
         self.db['%s_time' % key] = str(time())
 
     def _read_data(self, spider, request):
@@ -215,14 +271,17 @@ class DbmCacheStorage(object):
         return request_fingerprint(request)
 
 
-class FilesystemCacheStorage(object):
+class FilesystemCacheStorage:
 
     def __init__(self, settings):
         self.cachedir = data_path(settings['HTTPCACHE_DIR'])
         self.expiration_secs = settings.getint('HTTPCACHE_EXPIRATION_SECS')
+        self.use_gzip = settings.getbool('HTTPCACHE_GZIP')
+        self._open = gzip.open if self.use_gzip else open
 
     def open_spider(self, spider):
-        pass
+        logger.debug("Using filesystem cache storage in %(cachedir)s" % {'cachedir': self.cachedir},
+                     extra={'spider': spider})
 
     def close_spider(self, spider):
         pass
@@ -233,9 +292,9 @@ class FilesystemCacheStorage(object):
         if metadata is None:
             return  # not cached
         rpath = self._get_request_path(spider, request)
-        with open(os.path.join(rpath, 'response_body'), 'rb') as f:
+        with self._open(os.path.join(rpath, 'response_body'), 'rb') as f:
             body = f.read()
-        with open(os.path.join(rpath, 'response_headers'), 'rb') as f:
+        with self._open(os.path.join(rpath, 'response_headers'), 'rb') as f:
             rawheaders = f.read()
         url = metadata.get('response_url')
         status = metadata['status']
@@ -256,17 +315,17 @@ class FilesystemCacheStorage(object):
             'response_url': response.url,
             'timestamp': time(),
         }
-        with open(os.path.join(rpath, 'meta'), 'wb') as f:
-            f.write(repr(metadata))
-        with open(os.path.join(rpath, 'pickled_meta'), 'wb') as f:
-            pickle.dump(metadata, f, protocol=2)
-        with open(os.path.join(rpath, 'response_headers'), 'wb') as f:
+        with self._open(os.path.join(rpath, 'meta'), 'wb') as f:
+            f.write(to_bytes(repr(metadata)))
+        with self._open(os.path.join(rpath, 'pickled_meta'), 'wb') as f:
+            pickle.dump(metadata, f, protocol=4)
+        with self._open(os.path.join(rpath, 'response_headers'), 'wb') as f:
             f.write(headers_dict_to_raw(response.headers))
-        with open(os.path.join(rpath, 'response_body'), 'wb') as f:
+        with self._open(os.path.join(rpath, 'response_body'), 'wb') as f:
             f.write(response.body)
-        with open(os.path.join(rpath, 'request_headers'), 'wb') as f:
+        with self._open(os.path.join(rpath, 'request_headers'), 'wb') as f:
             f.write(headers_dict_to_raw(request.headers))
-        with open(os.path.join(rpath, 'request_body'), 'wb') as f:
+        with self._open(os.path.join(rpath, 'request_body'), 'wb') as f:
             f.write(request.body)
 
     def _get_request_path(self, spider, request):
@@ -278,91 +337,28 @@ class FilesystemCacheStorage(object):
         metapath = os.path.join(rpath, 'pickled_meta')
         if not os.path.exists(metapath):
             return  # not found
-        mtime = os.stat(rpath).st_mtime
+        mtime = os.stat(metapath).st_mtime
         if 0 < self.expiration_secs < time() - mtime:
             return  # expired
-        with open(metapath, 'rb') as f:
+        with self._open(metapath, 'rb') as f:
             return pickle.load(f)
 
 
-class LeveldbCacheStorage(object):
-
-    def __init__(self, settings):
-        import leveldb
-        self._leveldb = leveldb
-        self.cachedir = data_path(settings['HTTPCACHE_DIR'], createdir=True)
-        self.expiration_secs = settings.getint('HTTPCACHE_EXPIRATION_SECS')
-        self.db = None
-
-    def open_spider(self, spider):
-        dbpath = os.path.join(self.cachedir, '%s.leveldb' % spider.name)
-        self.db = self._leveldb.LevelDB(dbpath)
-
-    def close_spider(self, spider):
-        del self.db
-
-    def retrieve_response(self, spider, request):
-        data = self._read_data(spider, request)
-        if data is None:
-            return  # not cached
-        url = data['url']
-        status = data['status']
-        headers = Headers(data['headers'])
-        body = data['body']
-        respcls = responsetypes.from_args(headers=headers, url=url)
-        response = respcls(url=url, headers=headers, status=status, body=body)
-        return response
-
-    def store_response(self, spider, request, response):
-        key = self._request_key(request)
-        data = {
-            'status': response.status,
-            'url': response.url,
-            'headers': dict(response.headers),
-            'body': response.body,
-        }
-        batch = self._leveldb.WriteBatch()
-        batch.Put('%s_data' % key, pickle.dumps(data, protocol=2))
-        batch.Put('%s_time' % key, str(time()))
-        self.db.Write(batch)
-
-    def _read_data(self, spider, request):
-        key = self._request_key(request)
-        try:
-            ts = self.db.Get('%s_time' % key)
-        except KeyError:
-            return  # not found or invalid entry
-
-        if 0 < self.expiration_secs < time() - float(ts):
-            return  # expired
-
-        try:
-            data = self.db.Get('%s_data' % key)
-        except KeyError:
-            return  # invalid entry
-        else:
-            return pickle.loads(data)
-
-    def _request_key(self, request):
-        return request_fingerprint(request)
-
-
-
 def parse_cachecontrol(header):
     """Parse Cache-Control header
 
-    http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.9
+    https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.9
 
-    >>> parse_cachecontrol('public, max-age=3600') == {'public': None,
-    ...                                                'max-age': '3600'}
+    >>> parse_cachecontrol(b'public, max-age=3600') == {b'public': None,
+    ...                                                 b'max-age': b'3600'}
     True
-    >>> parse_cachecontrol('') == {}
+    >>> parse_cachecontrol(b'') == {}
     True
 
     """
     directives = {}
-    for directive in header.split(','):
-        key, sep, val = directive.strip().partition('=')
+    for directive in header.split(b','):
+        key, sep, val = directive.strip().partition(b'=')
         if key:
             directives[key.lower()] = val if sep else None
     return directives
@@ -370,6 +366,7 @@ def parse_cachecontrol(header):
 
 def rfc1123_to_epoch(date_str):
     try:
+        date_str = to_unicode(date_str, encoding='ascii')
         return mktime_tz(parsedate_tz(date_str))
     except Exception:
         return None
diff --git a/scrapy/contrib/logstats.py b/scrapy/extensions/logstats.py
similarity index 72%
rename from scrapy/contrib/logstats.py
rename to scrapy/extensions/logstats.py
index 4f2567c3f..0be2831a1 100644
--- a/scrapy/contrib/logstats.py
+++ b/scrapy/extensions/logstats.py
@@ -1,16 +1,21 @@
+import logging
+
 from twisted.internet import task
 
 from scrapy.exceptions import NotConfigured
-from scrapy import log, signals
+from scrapy import signals
+
+logger = logging.getLogger(__name__)
 
 
-class LogStats(object):
+class LogStats:
     """Log basic scraping stats periodically"""
 
     def __init__(self, stats, interval=60.0):
         self.stats = stats
         self.interval = interval
         self.multiplier = 60.0 / self.interval
+        self.task = None
 
     @classmethod
     def from_crawler(cls, crawler):
@@ -35,10 +40,13 @@ class LogStats(object):
         irate = (items - self.itemsprev) * self.multiplier
         prate = (pages - self.pagesprev) * self.multiplier
         self.pagesprev, self.itemsprev = pages, items
-        msg = "Crawled %d pages (at %d pages/min), scraped %d items (at %d items/min)" \
-            % (pages, prate, items, irate)
-        log.msg(msg, spider=spider)
+
+        msg = ("Crawled %(pages)d pages (at %(pagerate)d pages/min), "
+               "scraped %(items)d items (at %(itemrate)d items/min)")
+        log_args = {'pages': pages, 'pagerate': prate,
+                    'items': items, 'itemrate': irate}
+        logger.info(msg, log_args, extra={'spider': spider})
 
     def spider_closed(self, spider, reason):
-        if self.task.running:
+        if self.task and self.task.running:
             self.task.stop()
diff --git a/scrapy/contrib/memdebug.py b/scrapy/extensions/memdebug.py
similarity index 90%
rename from scrapy/contrib/memdebug.py
rename to scrapy/extensions/memdebug.py
index 263d8ce4c..dc8cdbb1d 100644
--- a/scrapy/contrib/memdebug.py
+++ b/scrapy/extensions/memdebug.py
@@ -5,14 +5,13 @@ See documentation in docs/topics/extensions.rst
 """
 
 import gc
-import six
 
 from scrapy import signals
 from scrapy.exceptions import NotConfigured
 from scrapy.utils.trackref import live_refs
 
 
-class MemoryDebugger(object):
+class MemoryDebugger:
 
     def __init__(self, stats):
         self.stats = stats
@@ -28,7 +27,7 @@ class MemoryDebugger(object):
     def spider_closed(self, spider, reason):
         gc.collect()
         self.stats.set_value('memdebug/gc_garbage_count', len(gc.garbage), spider=spider)
-        for cls, wdict in six.iteritems(live_refs):
+        for cls, wdict in live_refs.items():
             if not wdict:
                 continue
             self.stats.set_value('memdebug/live_refs/%s' % cls.__name__, len(wdict), spider=spider)
diff --git a/scrapy/contrib/memusage.py b/scrapy/extensions/memusage.py
similarity index 77%
rename from scrapy/contrib/memusage.py
rename to scrapy/extensions/memusage.py
index 6bcba8e11..ab2e43e8c 100644
--- a/scrapy/contrib/memusage.py
+++ b/scrapy/extensions/memusage.py
@@ -5,17 +5,21 @@ See documentation in docs/topics/extensions.rst
 """
 import sys
 import socket
+import logging
 from pprint import pformat
 from importlib import import_module
 
 from twisted.internet import task
 
-from scrapy import signals, log
+from scrapy import signals
 from scrapy.exceptions import NotConfigured
 from scrapy.mail import MailSender
 from scrapy.utils.engine import get_engine_status
 
-class MemoryUsage(object):
+logger = logging.getLogger(__name__)
+
+
+class MemoryUsage:
 
     def __init__(self, crawler):
         if not crawler.settings.getbool('MEMUSAGE_ENABLED'):
@@ -31,7 +35,7 @@ class MemoryUsage(object):
         self.notify_mails = crawler.settings.getlist('MEMUSAGE_NOTIFY_MAIL')
         self.limit = crawler.settings.getint('MEMUSAGE_LIMIT_MB')*1024*1024
         self.warning = crawler.settings.getint('MEMUSAGE_WARNING_MB')*1024*1024
-        self.report = crawler.settings.getbool('MEMUSAGE_REPORT')
+        self.check_interval = crawler.settings.getfloat('MEMUSAGE_CHECK_INTERVAL_SECONDS')
         self.mail = MailSender.from_settings(crawler.settings)
         crawler.signals.connect(self.engine_started, signal=signals.engine_started)
         crawler.signals.connect(self.engine_stopped, signal=signals.engine_stopped)
@@ -43,7 +47,7 @@ class MemoryUsage(object):
     def get_virtual_size(self):
         size = self.resource.getrusage(self.resource.RUSAGE_SELF).ru_maxrss
         if sys.platform != 'darwin':
-            # on Mac OS X ru_maxrss is in bytes, on Linux it is in KB
+            # on macOS ru_maxrss is in bytes, on Linux it is in KB
             size *= 1024
         return size
 
@@ -52,15 +56,15 @@ class MemoryUsage(object):
         self.tasks = []
         tsk = task.LoopingCall(self.update)
         self.tasks.append(tsk)
-        tsk.start(60.0, now=True)
+        tsk.start(self.check_interval, now=True)
         if self.limit:
             tsk = task.LoopingCall(self._check_limit)
             self.tasks.append(tsk)
-            tsk.start(60.0, now=True)
+            tsk.start(self.check_interval, now=True)
         if self.warning:
             tsk = task.LoopingCall(self._check_warning)
             self.tasks.append(tsk)
-            tsk.start(60.0, now=True)
+            tsk.start(self.check_interval, now=True)
 
     def engine_stopped(self):
         for tsk in self.tasks:
@@ -74,11 +78,13 @@ class MemoryUsage(object):
         if self.get_virtual_size() > self.limit:
             self.crawler.stats.set_value('memusage/limit_reached', 1)
             mem = self.limit/1024/1024
-            log.msg(format="Memory usage exceeded %(memusage)dM. Shutting down Scrapy...",
-                    level=log.ERROR, memusage=mem)
+            logger.error("Memory usage exceeded %(memusage)dM. Shutting down Scrapy...",
+                         {'memusage': mem}, extra={'crawler': self.crawler})
             if self.notify_mails:
-                subj = "%s terminated: memory usage exceeded %dM at %s" % \
-                        (self.crawler.settings['BOT_NAME'], mem, socket.gethostname())
+                subj = (
+                    "%s terminated: memory usage exceeded %dM at %s"
+                    % (self.crawler.settings['BOT_NAME'], mem, socket.gethostname())
+                )
                 self._send_report(self.notify_mails, subj)
                 self.crawler.stats.set_value('memusage/limit_notified', 1)
 
@@ -95,11 +101,13 @@ class MemoryUsage(object):
         if self.get_virtual_size() > self.warning:
             self.crawler.stats.set_value('memusage/warning_reached', 1)
             mem = self.warning/1024/1024
-            log.msg(format="Memory usage reached %(memusage)dM",
-                    level=log.WARNING, memusage=mem)
+            logger.warning("Memory usage reached %(memusage)dM",
+                           {'memusage': mem}, extra={'crawler': self.crawler})
             if self.notify_mails:
-                subj = "%s warning: memory usage reached %dM at %s" % \
-                        (self.crawler.settings['BOT_NAME'], mem, socket.gethostname())
+                subj = (
+                    "%s warning: memory usage reached %dM at %s"
+                    % (self.crawler.settings['BOT_NAME'], mem, socket.gethostname())
+                )
                 self._send_report(self.notify_mails, subj)
                 self.crawler.stats.set_value('memusage/warning_notified', 1)
             self.warned = True
diff --git a/scrapy/contrib/spiderstate.py b/scrapy/extensions/spiderstate.py
similarity index 77%
rename from scrapy/contrib/spiderstate.py
rename to scrapy/extensions/spiderstate.py
index 3799c7c66..bea00596e 100644
--- a/scrapy/contrib/spiderstate.py
+++ b/scrapy/extensions/spiderstate.py
@@ -1,10 +1,12 @@
 import os
-from six.moves import cPickle as pickle
+import pickle
 
 from scrapy import signals
+from scrapy.exceptions import NotConfigured
 from scrapy.utils.job import job_dir
 
-class SpiderState(object):
+
+class SpiderState:
     """Store and load spider state during a scraping job"""
 
     def __init__(self, jobdir=None):
@@ -12,7 +14,11 @@ class SpiderState(object):
 
     @classmethod
     def from_crawler(cls, crawler):
-        obj = cls(job_dir(crawler.settings))
+        jobdir = job_dir(crawler.settings)
+        if not jobdir:
+            raise NotConfigured
+
+        obj = cls(jobdir)
         crawler.signals.connect(obj.spider_closed, signal=signals.spider_closed)
         crawler.signals.connect(obj.spider_opened, signal=signals.spider_opened)
         return obj
@@ -20,7 +26,7 @@ class SpiderState(object):
     def spider_closed(self, spider):
         if self.jobdir:
             with open(self.statefn, 'wb') as f:
-                pickle.dump(spider.state, f, protocol=2)
+                pickle.dump(spider.state, f, protocol=4)
 
     def spider_opened(self, spider):
         if self.jobdir and os.path.exists(self.statefn):
diff --git a/scrapy/contrib/statsmailer.py b/scrapy/extensions/statsmailer.py
similarity index 97%
rename from scrapy/contrib/statsmailer.py
rename to scrapy/extensions/statsmailer.py
index 6a982195d..320f13b29 100644
--- a/scrapy/contrib/statsmailer.py
+++ b/scrapy/extensions/statsmailer.py
@@ -8,7 +8,7 @@ from scrapy import signals
 from scrapy.mail import MailSender
 from scrapy.exceptions import NotConfigured
 
-class StatsMailer(object):
+class StatsMailer:
 
     def __init__(self, stats, recipients, mail):
         self.stats = stats
diff --git a/scrapy/telnet.py b/scrapy/extensions/telnet.py
similarity index 50%
rename from scrapy/telnet.py
rename to scrapy/extensions/telnet.py
index d2617f69d..1663604e7 100644
--- a/scrapy/telnet.py
+++ b/scrapy/extensions/telnet.py
@@ -5,22 +5,29 @@ See documentation in docs/topics/telnetconsole.rst
 """
 
 import pprint
+import logging
+import traceback
+import binascii
+import os
 
-from twisted.conch import manhole, telnet
-from twisted.conch.insults import insults
 from twisted.internet import protocol
+try:
+    from twisted.conch import manhole, telnet
+    from twisted.conch.insults import insults
+    TWISTED_CONCH_AVAILABLE = True
+except (ImportError, SyntaxError):
+    _TWISTED_CONCH_TRACEBACK = traceback.format_exc()
+    TWISTED_CONCH_AVAILABLE = False
 
 from scrapy.exceptions import NotConfigured
-from scrapy import log, signals
+from scrapy import signals
 from scrapy.utils.trackref import print_live_refs
 from scrapy.utils.engine import print_engine_status
 from scrapy.utils.reactor import listen_tcp
+from scrapy.utils.decorators import defers
 
-try:
-    import guppy
-    hpy = guppy.hpy()
-except ImportError:
-    hpy = None
+
+logger = logging.getLogger(__name__)
 
 # signal to update telnet variables
 # args: telnet_vars
@@ -32,10 +39,21 @@ class TelnetConsole(protocol.ServerFactory):
     def __init__(self, crawler):
         if not crawler.settings.getbool('TELNETCONSOLE_ENABLED'):
             raise NotConfigured
+        if not TWISTED_CONCH_AVAILABLE:
+            raise NotConfigured(
+                'TELNETCONSOLE_ENABLED setting is True but required twisted '
+                'modules failed to import:\n' + _TWISTED_CONCH_TRACEBACK)
         self.crawler = crawler
         self.noisy = False
         self.portrange = [int(x) for x in crawler.settings.getlist('TELNETCONSOLE_PORT')]
         self.host = crawler.settings['TELNETCONSOLE_HOST']
+        self.username = crawler.settings['TELNETCONSOLE_USERNAME']
+        self.password = crawler.settings['TELNETCONSOLE_PASSWORD']
+
+        if not self.password:
+            self.password = binascii.hexlify(os.urandom(8)).decode('utf8')
+            logger.info('Telnet Password: %s', self.password)
+
         self.crawler.signals.connect(self.start_listening, signals.engine_started)
         self.crawler.signals.connect(self.stop_listening, signals.engine_stopped)
 
@@ -46,16 +64,35 @@ class TelnetConsole(protocol.ServerFactory):
     def start_listening(self):
         self.port = listen_tcp(self.portrange, self.host, self)
         h = self.port.getHost()
-        log.msg(format="Telnet console listening on %(host)s:%(port)d",
-                level=log.DEBUG, host=h.host, port=h.port)
+        logger.info("Telnet console listening on %(host)s:%(port)d",
+                    {'host': h.host, 'port': h.port},
+                    extra={'crawler': self.crawler})
 
     def stop_listening(self):
         self.port.stopListening()
 
     def protocol(self):
-        telnet_vars = self._get_telnet_vars()
-        return telnet.TelnetTransport(telnet.TelnetBootstrapProtocol,
-            insults.ServerProtocol, manhole.Manhole, telnet_vars)
+        class Portal:
+            """An implementation of IPortal"""
+            @defers
+            def login(self_, credentials, mind, *interfaces):
+                if not (
+                    credentials.username == self.username.encode('utf8')
+                    and credentials.checkPassword(self.password.encode('utf8'))
+                ):
+                    raise ValueError("Invalid credentials")
+
+                protocol = telnet.TelnetBootstrapProtocol(
+                    insults.ServerProtocol,
+                    manhole.Manhole,
+                    self._get_telnet_vars()
+                )
+                return (interfaces[0], protocol, lambda: None)
+
+        return telnet.TelnetTransport(
+            telnet.AuthenticatingTelnetProtocol,
+            Portal()
+        )
 
     def _get_telnet_vars(self):
         # Note: if you add entries here also update topics/telnetconsole.rst
@@ -66,14 +103,12 @@ class TelnetConsole(protocol.ServerFactory):
             'crawler': self.crawler,
             'extensions': self.crawler.extensions,
             'stats': self.crawler.stats,
-            'spiders': self.crawler.spiders,
             'settings': self.crawler.settings,
             'est': lambda: print_engine_status(self.crawler.engine),
             'p': pprint.pprint,
             'prefs': print_live_refs,
-            'hpy': hpy,
-            'help': "This is Scrapy telnet console. For more info see: " \
-                "http://doc.scrapy.org/en/latest/topics/telnetconsole.html",
+            'help': "This is Scrapy telnet console. For more info see: "
+                    "https://docs.scrapy.org/en/latest/topics/telnetconsole.html",
         }
         self.crawler.signals.send_catch_log(update_telnet_vars, telnet_vars=telnet_vars)
         return telnet_vars
diff --git a/scrapy/contrib/throttle.py b/scrapy/extensions/throttle.py
similarity index 57%
rename from scrapy/contrib/throttle.py
rename to scrapy/extensions/throttle.py
index a5601bcd0..56e5ad2d2 100644
--- a/scrapy/contrib/throttle.py
+++ b/scrapy/extensions/throttle.py
@@ -1,9 +1,12 @@
 import logging
+
 from scrapy.exceptions import NotConfigured
 from scrapy import signals
 
+logger = logging.getLogger(__name__)
 
-class AutoThrottle(object):
+
+class AutoThrottle:
 
     def __init__(self, crawler):
         self.crawler = crawler
@@ -11,6 +14,7 @@ class AutoThrottle(object):
             raise NotConfigured
 
         self.debug = crawler.settings.getbool("AUTOTHROTTLE_DEBUG")
+        self.target_concurrency = crawler.settings.getfloat("AUTOTHROTTLE_TARGET_CONCURRENCY")
         crawler.signals.connect(self._spider_opened, signal=signals.spider_opened)
         crawler.signals.connect(self._response_downloaded, signal=signals.response_downloaded)
 
@@ -25,15 +29,13 @@ class AutoThrottle(object):
 
     def _min_delay(self, spider):
         s = self.crawler.settings
-        return getattr(spider, 'download_delay', 0.0) or \
-            s.getfloat('AUTOTHROTTLE_MIN_DOWNLOAD_DELAY') or \
-            s.getfloat('DOWNLOAD_DELAY')
+        return getattr(spider, 'download_delay', s.getfloat('DOWNLOAD_DELAY'))
 
     def _max_delay(self, spider):
-        return self.crawler.settings.getfloat('AUTOTHROTTLE_MAX_DELAY', 60.0)
+        return self.crawler.settings.getfloat('AUTOTHROTTLE_MAX_DELAY')
 
     def _start_delay(self, spider):
-        return max(self.mindelay, self.crawler.settings.getfloat('AUTOTHROTTLE_START_DELAY', 5.0))
+        return max(self.mindelay, self.crawler.settings.getfloat('AUTOTHROTTLE_START_DELAY'))
 
     def _response_downloaded(self, response, request, spider):
         key, slot = self._get_slot(request, spider)
@@ -47,9 +49,17 @@ class AutoThrottle(object):
             diff = slot.delay - olddelay
             size = len(response.body)
             conc = len(slot.transferring)
-            msg = "slot: %s | conc:%2d | delay:%5d ms (%+d) | latency:%5d ms | size:%6d bytes" % \
-                  (key, conc, slot.delay * 1000, diff * 1000, latency * 1000, size)
-            spider.log(msg, level=logging.INFO)
+            logger.info(
+                "slot: %(slot)s | conc:%(concurrency)2d | "
+                "delay:%(delay)5d ms (%(delaydiff)+d) | "
+                "latency:%(latency)5d ms | size:%(size)6d bytes",
+                {
+                    'slot': key, 'concurrency': conc,
+                    'delay': slot.delay * 1000, 'delaydiff': diff * 1000,
+                    'latency': latency * 1000, 'size': size
+                },
+                extra={'spider': spider}
+            )
 
     def _get_slot(self, request, spider):
         key = request.meta.get('download_slot')
@@ -57,13 +67,27 @@ class AutoThrottle(object):
 
     def _adjust_delay(self, slot, latency, response):
         """Define delay adjustment policy"""
-        # If latency is bigger than old delay, then use latency instead of mean.
-        # It works better with problematic sites
-        new_delay = min(max(self.mindelay, latency, (slot.delay + latency) / 2.0), self.maxdelay)
+
+        # If a server needs `latency` seconds to respond then
+        # we should send a request each `latency/N` seconds
+        # to have N requests processed in parallel
+        target_delay = latency / self.target_concurrency
+
+        # Adjust the delay to make it closer to target_delay
+        new_delay = (slot.delay + target_delay) / 2.0
+
+        # If target delay is bigger than old delay, then use it instead of mean.
+        # It works better with problematic sites.
+        new_delay = max(target_delay, new_delay)
+
+        # Make sure self.mindelay <= new_delay <= self.max_delay
+        new_delay = min(max(self.mindelay, new_delay), self.maxdelay)
 
         # Dont adjust delay if response status != 200 and new delay is smaller
         # than old one, as error pages (and redirections) are usually small and
         # so tend to reduce latency, thus provoking a positive feedback by
         # reducing delay instead of increase.
-        if response.status == 200 or new_delay > slot.delay:
-            slot.delay = new_delay
+        if response.status != 200 and new_delay <= slot.delay:
+            return
+
+        slot.delay = new_delay
diff --git a/scrapy/http/__init__.py b/scrapy/http/__init__.py
index f04a9d3e5..e6c58e1f1 100644
--- a/scrapy/http/__init__.py
+++ b/scrapy/http/__init__.py
@@ -10,6 +10,7 @@ from scrapy.http.headers import Headers
 from scrapy.http.request import Request
 from scrapy.http.request.form import FormRequest
 from scrapy.http.request.rpc import XmlRpcRequest
+from scrapy.http.request.json_request import JsonRequest
 
 from scrapy.http.response import Response
 from scrapy.http.response.html import HtmlResponse
diff --git a/scrapy/http/cookies.py b/scrapy/http/cookies.py
index cc96cf8ac..0c97e6999 100644
--- a/scrapy/http/cookies.py
+++ b/scrapy/http/cookies.py
@@ -1,9 +1,11 @@
 import time
-from cookielib import CookieJar as _CookieJar, DefaultCookiePolicy, IPV4_RE
+from http.cookiejar import CookieJar as _CookieJar, DefaultCookiePolicy, IPV4_RE
+
 from scrapy.utils.httpobj import urlparse_cached
+from scrapy.utils.python import to_unicode
 
 
-class CookieJar(object):
+class CookieJar:
     def __init__(self, policy=None, check_expired_frequency=10000):
         self.policy = policy or DefaultCookiePolicy()
         self.jar = _CookieJar(self.policy)
@@ -28,8 +30,8 @@ class CookieJar(object):
 
         if not IPV4_RE.search(req_host):
             hosts = potential_domain_matches(req_host)
-            if req_host.find(".") == -1:
-                hosts += req_host + ".local"
+            if '.' not in req_host:
+                hosts += [req_host + ".local"]
         else:
             hosts = [req_host]
 
@@ -55,8 +57,8 @@ class CookieJar(object):
     def clear_session_cookies(self, *args, **kwargs):
         return self.jar.clear_session_cookies(*args, **kwargs)
 
-    def clear(self):
-        return self.jar.clear()
+    def clear(self, domain=None, path=None, name=None):
+        return self.jar.clear(domain, path, name)
 
     def __iter__(self):
         return iter(self.jar)
@@ -97,7 +99,8 @@ def potential_domain_matches(domain):
         pass
     return matches + ['.' + d for d in matches]
 
-class _DummyLock(object):
+
+class _DummyLock:
     def acquire(self):
         pass
 
@@ -105,7 +108,7 @@ class _DummyLock(object):
         pass
 
 
-class WrappedRequest(object):
+class WrappedRequest:
     """Wraps a scrapy Request class with methods defined by urllib2.Request class to interact with CookieJar class
 
     see http://docs.python.org/library/urllib2.html#urllib2.Request
@@ -136,21 +139,46 @@ class WrappedRequest(object):
     def get_origin_req_host(self):
         return urlparse_cached(self.request).hostname
 
+    # python3 uses attributes instead of methods
+    @property
+    def full_url(self):
+        return self.get_full_url()
+
+    @property
+    def host(self):
+        return self.get_host()
+
+    @property
+    def type(self):
+        return self.get_type()
+
+    @property
+    def unverifiable(self):
+        return self.is_unverifiable()
+
+    @property
+    def origin_req_host(self):
+        return self.get_origin_req_host()
+
     def has_header(self, name):
         return name in self.request.headers
 
     def get_header(self, name, default=None):
-        return self.request.headers.get(name, default)
+        return to_unicode(self.request.headers.get(name, default),
+                          errors='replace')
 
     def header_items(self):
-        return self.request.headers.items()
+        return [
+            (to_unicode(k, errors='replace'),
+             [to_unicode(x, errors='replace') for x in v])
+            for k, v in self.request.headers.items()
+        ]
 
     def add_unredirected_header(self, name, value):
         self.request.headers.appendlist(name, value)
-        #print 'add_unredirected_header', self.request.headers
 
 
-class WrappedResponse(object):
+class WrappedResponse:
 
     def __init__(self, response):
         self.response = response
@@ -158,5 +186,6 @@ class WrappedResponse(object):
     def info(self):
         return self
 
-    def getheaders(self, name):
-        return self.response.headers.getlist(name)
+    def get_all(self, name, default=None):
+        return [to_unicode(v, errors='replace')
+                for v in self.response.headers.getlist(name)]
diff --git a/scrapy/http/headers.py b/scrapy/http/headers.py
index 13f0f0383..6bf9e5346 100644
--- a/scrapy/http/headers.py
+++ b/scrapy/http/headers.py
@@ -1,6 +1,6 @@
-import six
 from w3lib.http import headers_dict_to_raw
 from scrapy.utils.datatypes import CaselessDict
+from scrapy.utils.python import to_unicode
 
 
 class Headers(CaselessDict):
@@ -8,7 +8,7 @@ class Headers(CaselessDict):
 
     def __init__(self, seq=None, encoding='utf-8'):
         self.encoding = encoding
-        super(Headers, self).__init__(seq)
+        super().__init__(seq)
 
     def normkey(self, key):
         """Normalize key to bytes"""
@@ -18,7 +18,7 @@ class Headers(CaselessDict):
         """Normalize values to bytes"""
         if value is None:
             value = []
-        elif isinstance(value, (six.text_type, bytes)):
+        elif isinstance(value, (str, bytes)):
             value = [value]
         elif not hasattr(value, '__iter__'):
             value = [value]
@@ -28,28 +28,28 @@ class Headers(CaselessDict):
     def _tobytes(self, x):
         if isinstance(x, bytes):
             return x
-        elif isinstance(x, six.text_type):
+        elif isinstance(x, str):
             return x.encode(self.encoding)
         elif isinstance(x, int):
-            return six.text_type(x).encode(self.encoding)
+            return str(x).encode(self.encoding)
         else:
             raise TypeError('Unsupported value type: {}'.format(type(x)))
 
     def __getitem__(self, key):
         try:
-            return super(Headers, self).__getitem__(key)[-1]
+            return super().__getitem__(key)[-1]
         except IndexError:
             return None
 
     def get(self, key, def_val=None):
         try:
-            return super(Headers, self).get(key, def_val)[-1]
+            return super().get(key, def_val)[-1]
         except IndexError:
             return None
 
     def getlist(self, key, def_val=None):
         try:
-            return super(Headers, self).__getitem__(key)
+            return super().__getitem__(key)
         except KeyError:
             if def_val is not None:
                 return self.normvalue(def_val)
@@ -67,9 +67,6 @@ class Headers(CaselessDict):
         self[key] = lst
 
     def items(self):
-        return list(self.iteritems())
-
-    def iteritems(self):
         return ((k, self.getlist(k)) for k in self.keys())
 
     def values(self):
@@ -78,8 +75,15 @@ class Headers(CaselessDict):
     def to_string(self):
         return headers_dict_to_raw(self)
 
+    def to_unicode_dict(self):
+        """ Return headers as a CaselessDict with unicode keys
+        and unicode values. Multiple values are joined with ','.
+        """
+        return CaselessDict(
+            (to_unicode(key, encoding=self.encoding),
+             to_unicode(b','.join(value), encoding=self.encoding))
+            for key, value in self.items())
+
     def __copy__(self):
         return self.__class__(self)
     copy = __copy__
-
-
diff --git a/scrapy/http/request/__init__.py b/scrapy/http/request/__init__.py
index 89d9ee584..a98ba9960 100644
--- a/scrapy/http/request/__init__.py
+++ b/scrapy/http/request/__init__.py
@@ -4,29 +4,34 @@ requests in Scrapy.
 
 See documentation in docs/topics/request-response.rst
 """
-import six
 from w3lib.url import safe_url_string
 
 from scrapy.http.headers import Headers
+from scrapy.utils.python import to_bytes
 from scrapy.utils.trackref import object_ref
 from scrapy.utils.url import escape_ajax
 from scrapy.http.common import obsolete_setter
+from scrapy.utils.curl import curl_to_request_kwargs
 
 
 class Request(object_ref):
 
     def __init__(self, url, callback=None, method='GET', headers=None, body=None,
                  cookies=None, meta=None, encoding='utf-8', priority=0,
-                 dont_filter=False, errback=None):
+                 dont_filter=False, errback=None, flags=None, cb_kwargs=None):
 
         self._encoding = encoding  # this one has to be set first
         self.method = str(method).upper()
         self._set_url(url)
         self._set_body(body)
-        assert isinstance(priority, int), "Request priority not an integer: %r" % priority
+        if not isinstance(priority, int):
+            raise TypeError("Request priority not an integer: %r" % priority)
         self.priority = priority
 
-        assert callback or not errback, "Cannot use errback without a callback"
+        if callback is not None and not callable(callback):
+            raise TypeError('callback must be a callable, got %s' % type(callback).__name__)
+        if errback is not None and not callable(errback):
+            raise TypeError('errback must be a callable, got %s' % type(errback).__name__)
         self.callback = callback
         self.errback = errback
 
@@ -35,6 +40,14 @@ class Request(object_ref):
         self.dont_filter = dont_filter
 
         self._meta = dict(meta) if meta else None
+        self._cb_kwargs = dict(cb_kwargs) if cb_kwargs else None
+        self.flags = [] if flags is None else list(flags)
+
+    @property
+    def cb_kwargs(self):
+        if self._cb_kwargs is None:
+            self._cb_kwargs = {}
+        return self._cb_kwargs
 
     @property
     def meta(self):
@@ -46,16 +59,13 @@ class Request(object_ref):
         return self._url
 
     def _set_url(self, url):
-        if isinstance(url, str):
-            self._url = escape_ajax(safe_url_string(url))
-        elif isinstance(url, six.text_type):
-            if self.encoding is None:
-                raise TypeError('Cannot convert unicode url - %s has no encoding' %
-                                type(self).__name__)
-            self._set_url(url.encode(self.encoding))
-        else:
+        if not isinstance(url, str):
             raise TypeError('Request url must be str or unicode, got %s:' % type(url).__name__)
-        if ':' not in self._url:
+
+        s = safe_url_string(url, self.encoding)
+        self._url = escape_ajax(s)
+
+        if ('://' not in self._url) and (not self._url.startswith('data:')):
             raise ValueError('Missing scheme in request url: %s' % self._url)
 
     url = property(_get_url, obsolete_setter(_set_url, 'url'))
@@ -64,17 +74,10 @@ class Request(object_ref):
         return self._body
 
     def _set_body(self, body):
-        if isinstance(body, str):
-            self._body = body
-        elif isinstance(body, six.text_type):
-            if self.encoding is None:
-                raise TypeError('Cannot convert unicode body - %s has no encoding' %
-                                type(self).__name__)
-            self._body = body.encode(self.encoding)
-        elif body is None:
-            self._body = ''
+        if body is None:
+            self._body = b''
         else:
-            raise TypeError("Request body must either str or unicode. Got: '%s'" % type(body).__name__)
+            self._body = to_bytes(body, self.encoding)
 
     body = property(_get_body, obsolete_setter(_set_body, 'body'))
 
@@ -95,8 +98,42 @@ class Request(object_ref):
         """Create a new Request with the same attributes except for those
         given new values.
         """
-        for x in ['url', 'method', 'headers', 'body', 'cookies', 'meta',
-                  'encoding', 'priority', 'dont_filter', 'callback', 'errback']:
+        for x in ['url', 'method', 'headers', 'body', 'cookies', 'meta', 'flags',
+                  'encoding', 'priority', 'dont_filter', 'callback', 'errback', 'cb_kwargs']:
             kwargs.setdefault(x, getattr(self, x))
         cls = kwargs.pop('cls', self.__class__)
         return cls(*args, **kwargs)
+
+    @classmethod
+    def from_curl(cls, curl_command, ignore_unknown_options=True, **kwargs):
+        """Create a Request object from a string containing a `cURL
+        <https://curl.haxx.se/>`_ command. It populates the HTTP method, the
+        URL, the headers, the cookies and the body. It accepts the same
+        arguments as the :class:`Request` class, taking preference and
+        overriding the values of the same arguments contained in the cURL
+        command.
+
+        Unrecognized options are ignored by default. To raise an error when
+        finding unknown options call this method by passing
+        ``ignore_unknown_options=False``.
+
+        .. caution:: Using :meth:`from_curl` from :class:`~scrapy.http.Request`
+                     subclasses, such as :class:`~scrapy.http.JSONRequest`, or
+                     :class:`~scrapy.http.XmlRpcRequest`, as well as having
+                     :ref:`downloader middlewares <topics-downloader-middleware>`
+                     and
+                     :ref:`spider middlewares <topics-spider-middleware>`
+                     enabled, such as
+                     :class:`~scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware`,
+                     :class:`~scrapy.downloadermiddlewares.useragent.UserAgentMiddleware`,
+                     or
+                     :class:`~scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware`,
+                     may modify the :class:`~scrapy.http.Request` object.
+
+        To translate a cURL command into a Scrapy request,
+        you may use `curl2scrapy <https://michael-shub.github.io/curl2scrapy/>`_.
+
+       """
+        request_kwargs = curl_to_request_kwargs(curl_command, ignore_unknown_options)
+        request_kwargs.update(kwargs)
+        return cls(**request_kwargs)
diff --git a/scrapy/http/request/form.py b/scrapy/http/request/form.py
index 3aa9db960..59af81321 100644
--- a/scrapy/http/request/form.py
+++ b/scrapy/http/request/form.py
@@ -1,64 +1,83 @@
 """
-This module implements the FormRequest class which is a more covenient class
+This module implements the FormRequest class which is a more convenient class
 (than Request) to generate Requests based on form data.
 
 See documentation in docs/topics/request-response.rst
 """
 
-import urllib
-from six.moves.urllib.parse import urljoin
+from urllib.parse import urljoin, urlencode
+
 import lxml.html
-import six
+from parsel.selector import create_root_node
+from w3lib.html import strip_html5_whitespace
+
 from scrapy.http.request import Request
-from scrapy.utils.python import unicode_to_str
+from scrapy.utils.python import to_bytes, is_listlike
+from scrapy.utils.response import get_base_url
 
 
 class FormRequest(Request):
+    valid_form_methods = ['GET', 'POST']
 
     def __init__(self, *args, **kwargs):
         formdata = kwargs.pop('formdata', None)
         if formdata and kwargs.get('method') is None:
             kwargs['method'] = 'POST'
 
-        super(FormRequest, self).__init__(*args, **kwargs)
+        super().__init__(*args, **kwargs)
 
         if formdata:
             items = formdata.items() if isinstance(formdata, dict) else formdata
             querystr = _urlencode(items, self.encoding)
             if self.method == 'POST':
-                self.headers.setdefault('Content-Type', 'application/x-www-form-urlencoded')
+                self.headers.setdefault(b'Content-Type', b'application/x-www-form-urlencoded')
                 self._set_body(querystr)
             else:
                 self._set_url(self.url + ('&' if '?' in self.url else '?') + querystr)
 
     @classmethod
-    def from_response(cls, response, formname=None, formnumber=0, formdata=None,
-                      clickdata=None, dont_click=False, formxpath=None, **kwargs):
+    def from_response(cls, response, formname=None, formid=None, formnumber=0, formdata=None,
+                      clickdata=None, dont_click=False, formxpath=None, formcss=None, **kwargs):
+
         kwargs.setdefault('encoding', response.encoding)
-        form = _get_form(response, formname, formnumber, formxpath)
+
+        if formcss is not None:
+            from parsel.csstranslator import HTMLTranslator
+            formxpath = HTMLTranslator().css_to_xpath(formcss)
+
+        form = _get_form(response, formname, formid, formnumber, formxpath)
         formdata = _get_inputs(form, formdata, dont_click, clickdata, response)
         url = _get_form_url(form, kwargs.pop('url', None))
+
         method = kwargs.pop('method', form.method)
+        if method is not None:
+            method = method.upper()
+            if method not in cls.valid_form_methods:
+                method = 'GET'
+
         return cls(url=url, method=method, formdata=formdata, **kwargs)
 
 
 def _get_form_url(form, url):
     if url is None:
-        return form.action or form.base_url
+        action = form.get('action')
+        if action is None:
+            return form.base_url
+        return urljoin(form.base_url, strip_html5_whitespace(action))
     return urljoin(form.base_url, url)
 
 
 def _urlencode(seq, enc):
-    values = [(unicode_to_str(k, enc), unicode_to_str(v, enc))
+    values = [(to_bytes(k, enc), to_bytes(v, enc))
               for k, vs in seq
-              for v in (vs if hasattr(vs, '__iter__') else [vs])]
-    return urllib.urlencode(values, doseq=1)
+              for v in (vs if is_listlike(vs) else [vs])]
+    return urlencode(values, doseq=1)
 
 
-def _get_form(response, formname, formnumber, formxpath):
+def _get_form(response, formname, formid, formnumber, formxpath):
     """Find the form element """
-    from scrapy.selector.lxmldocument import LxmlDocument
-    root = LxmlDocument(response, lxml.html.HTMLParser)
+    root = create_root_node(response.text, lxml.html.HTMLParser,
+                            base_url=get_base_url(response))
     forms = root.xpath('//form')
     if not forms:
         raise ValueError("No <form> element found in %s" % response)
@@ -68,6 +87,11 @@ def _get_form(response, formname, formnumber, formxpath):
         if f:
             return f[0]
 
+    if formid is not None:
+        f = root.xpath('//form[@id="%s"]' % formid)
+        if f:
+            return f[0]
+
     # Get form element from xpath, if not found, go up
     if formxpath is not None:
         nodes = root.xpath(formxpath)
@@ -95,24 +119,33 @@ def _get_form(response, formname, formnumber, formxpath):
 
 def _get_inputs(form, formdata, dont_click, clickdata, response):
     try:
-        formdata = dict(formdata or ())
+        formdata_keys = dict(formdata or ()).keys()
     except (ValueError, TypeError):
         raise ValueError('formdata should be a dict or iterable of tuples')
 
+    if not formdata:
+        formdata = ()
     inputs = form.xpath('descendant::textarea'
                         '|descendant::select'
-                        '|descendant::input[@type!="submit" and @type!="image" and @type!="reset"'
-                        'and ((@type!="checkbox" and @type!="radio") or @checked)]')
-    values = [(k, u'' if v is None else v)
+                        '|descendant::input[not(@type) or @type['
+                        ' not(re:test(., "^(?:submit|image|reset)$", "i"))'
+                        ' and (../@checked or'
+                        '  not(re:test(., "^(?:checkbox|radio)$", "i")))]]',
+                        namespaces={
+                            "re": "http://exslt.org/regular-expressions"})
+    values = [(k, '' if v is None else v)
               for k, v in (_value(e) for e in inputs)
-              if k and k not in formdata]
+              if k and k not in formdata_keys]
 
     if not dont_click:
         clickable = _get_clickable(clickdata, form)
         if clickable and clickable[0] not in formdata and not clickable[0] is None:
             values.append(clickable)
 
-    values.extend(formdata.items())
+    if isinstance(formdata, dict):
+        formdata = formdata.items()
+
+    values.extend((k, v) for k, v in formdata if v is not None)
     return values
 
 
@@ -135,7 +168,7 @@ def _select_value(ele, n, v):
         # This is a workround to bug in lxml fixed 2.3.1
         # fix https://github.com/lxml/lxml/commit/57f49eed82068a20da3db8f1b18ae00c1bab8b12#L1L1139
         selected_options = ele.xpath('.//option[@selected]')
-        v = [(o.get('value') or o.text or u'').strip() for o in selected_options]
+        v = [(o.get('value') or o.text or '').strip() for o in selected_options]
     return n, v
 
 
@@ -145,14 +178,18 @@ def _get_clickable(clickdata, form):
     if the latter is given. If not, it returns the first
     clickable element found
     """
-    clickables = [el for el in form.xpath('.//input[@type="submit"]')]
+    clickables = list(form.xpath(
+        'descendant::input[re:test(@type, "^(submit|image)$", "i")]'
+        '|descendant::button[not(@type) or re:test(@type, "^submit$", "i")]',
+        namespaces={"re": "http://exslt.org/regular-expressions"}
+    ))
     if not clickables:
         return
 
     # If we don't have clickdata, we just use the first clickable element
     if clickdata is None:
         el = clickables[0]
-        return (el.name, el.value)
+        return (el.get('name'), el.get('value') or '')
 
     # If clickdata is given, we compare it to the clickable elements to find a
     # match. We first look to see if the number is specified in clickdata,
@@ -164,15 +201,14 @@ def _get_clickable(clickdata, form):
         except IndexError:
             pass
         else:
-            return (el.name, el.value)
+            return (el.get('name'), el.get('value') or '')
 
     # We didn't find it, so now we build an XPath expression out of the other
     # arguments, because they can be used as such
-    xpath = u'.//*' + \
-            u''.join(u'[@%s="%s"]' % c for c in six.iteritems(clickdata))
+    xpath = './/*' + ''.join('[@%s="%s"]' % c for c in clickdata.items())
     el = form.xpath(xpath)
     if len(el) == 1:
-        return (el[0].name, el[0].value)
+        return (el[0].get('name'), el[0].get('value') or '')
     elif len(el) > 1:
         raise ValueError("Multiple elements found (%r) matching the criteria "
                          "in clickdata: %r" % (el, clickdata))
diff --git a/scrapy/http/request/json_request.py b/scrapy/http/request/json_request.py
new file mode 100644
index 000000000..eae3f9f6b
--- /dev/null
+++ b/scrapy/http/request/json_request.py
@@ -0,0 +1,57 @@
+"""
+This module implements the JsonRequest class which is a more convenient class
+(than Request) to generate JSON Requests.
+
+See documentation in docs/topics/request-response.rst
+"""
+
+import copy
+import json
+import warnings
+
+from scrapy.http.request import Request
+from scrapy.utils.deprecate import create_deprecated_class
+
+
+class JsonRequest(Request):
+    def __init__(self, *args, **kwargs):
+        dumps_kwargs = copy.deepcopy(kwargs.pop('dumps_kwargs', {}))
+        dumps_kwargs.setdefault('sort_keys', True)
+        self._dumps_kwargs = dumps_kwargs
+
+        body_passed = kwargs.get('body', None) is not None
+        data = kwargs.pop('data', None)
+        data_passed = data is not None
+
+        if body_passed and data_passed:
+            warnings.warn('Both body and data passed. data will be ignored')
+
+        elif not body_passed and data_passed:
+            kwargs['body'] = self._dumps(data)
+
+            if 'method' not in kwargs:
+                kwargs['method'] = 'POST'
+
+        super().__init__(*args, **kwargs)
+        self.headers.setdefault('Content-Type', 'application/json')
+        self.headers.setdefault('Accept', 'application/json, text/javascript, */*; q=0.01')
+
+    def replace(self, *args, **kwargs):
+        body_passed = kwargs.get('body', None) is not None
+        data = kwargs.pop('data', None)
+        data_passed = data is not None
+
+        if body_passed and data_passed:
+            warnings.warn('Both body and data passed. data will be ignored')
+
+        elif not body_passed and data_passed:
+            kwargs['body'] = self._dumps(data)
+
+        return super().replace(*args, **kwargs)
+
+    def _dumps(self, data):
+        """Convert to JSON """
+        return json.dumps(data, **self._dumps_kwargs)
+
+
+JSONRequest = create_deprecated_class("JSONRequest", JsonRequest)
diff --git a/scrapy/http/request/rpc.py b/scrapy/http/request/rpc.py
index bd09f7534..c70912e49 100644
--- a/scrapy/http/request/rpc.py
+++ b/scrapy/http/request/rpc.py
@@ -4,7 +4,7 @@ This module implements the XmlRpcRequest class which is a more convenient class
 
 See documentation in docs/topics/request-response.rst
 """
-from six.moves import xmlrpc_client as xmlrpclib
+import xmlrpc.client as xmlrpclib
 
 from scrapy.http.request import Request
 from scrapy.utils.python import get_func_args
@@ -31,5 +31,5 @@ class XmlRpcRequest(Request):
         if encoding is not None:
             kwargs['encoding'] = encoding
 
-        super(XmlRpcRequest, self).__init__(*args, **kwargs)
+        super().__init__(*args, **kwargs)
         self.headers.setdefault('Content-Type', 'text/xml')
diff --git a/scrapy/http/response/__init__.py b/scrapy/http/response/__init__.py
index 7ff683eb6..c2c37dd1d 100644
--- a/scrapy/http/response/__init__.py
+++ b/scrapy/http/response/__init__.py
@@ -4,30 +4,49 @@ responses in Scrapy.
 
 See documentation in docs/topics/request-response.rst
 """
+from typing import Generator
+from urllib.parse import urljoin
 
-import copy
-
-from scrapy.http.headers import Headers
-from scrapy.utils.trackref import object_ref
+from scrapy.exceptions import NotSupported
 from scrapy.http.common import obsolete_setter
+from scrapy.http.headers import Headers
+from scrapy.http.request import Request
+from scrapy.link import Link
+from scrapy.utils.trackref import object_ref
+
 
 class Response(object_ref):
 
-    def __init__(self, url, status=200, headers=None, body='', flags=None, request=None):
+    def __init__(self, url, status=200, headers=None, body=b'', flags=None,
+                 request=None, certificate=None, ip_address=None):
         self.headers = Headers(headers or {})
         self.status = int(status)
         self._set_body(body)
         self._set_url(url)
         self.request = request
         self.flags = [] if flags is None else list(flags)
+        self.certificate = certificate
+        self.ip_address = ip_address
+
+    @property
+    def cb_kwargs(self):
+        try:
+            return self.request.cb_kwargs
+        except AttributeError:
+            raise AttributeError(
+                "Response.cb_kwargs not available, this response "
+                "is not tied to any request"
+            )
 
     @property
     def meta(self):
         try:
             return self.request.meta
         except AttributeError:
-            raise AttributeError("Response.meta not available, this response " \
-                "is not tied to any request")
+            raise AttributeError(
+                "Response.meta not available, this response "
+                "is not tied to any request"
+            )
 
     def _get_url(self):
         return self._url
@@ -36,8 +55,8 @@ class Response(object_ref):
         if isinstance(url, str):
             self._url = url
         else:
-            raise TypeError('%s url must be str, got %s:' % (type(self).__name__, \
-                type(url).__name__))
+            raise TypeError('%s url must be str, got %s:' %
+                            (type(self).__name__, type(url).__name__))
 
     url = property(_get_url, obsolete_setter(_set_url, 'url'))
 
@@ -45,16 +64,15 @@ class Response(object_ref):
         return self._body
 
     def _set_body(self, body):
-        if isinstance(body, str):
-            self._body = body
-        elif isinstance(body, unicode):
-            raise TypeError("Cannot assign a unicode body to a raw Response. " \
-                "Use TextResponse, HtmlResponse, etc")
-        elif body is None:
-            self._body = ''
+        if body is None:
+            self._body = b''
+        elif not isinstance(body, bytes):
+            raise TypeError(
+                "Response body must be bytes. "
+                "If you want to pass unicode body use TextResponse "
+                "or HtmlResponse.")
         else:
-            raise TypeError("Response body must either str or unicode. Got: '%s'" \
-                % type(body).__name__)
+            self._body = body
 
     body = property(_get_body, obsolete_setter(_set_body, 'body'))
 
@@ -71,7 +89,108 @@ class Response(object_ref):
         """Create a new Response with the same attributes except for those
         given new values.
         """
-        for x in ['url', 'status', 'headers', 'body', 'request', 'flags']:
+        for x in ['url', 'status', 'headers', 'body',
+                  'request', 'flags', 'certificate', 'ip_address']:
             kwargs.setdefault(x, getattr(self, x))
         cls = kwargs.pop('cls', self.__class__)
         return cls(*args, **kwargs)
+
+    def urljoin(self, url):
+        """Join this Response's url with a possible relative url to form an
+        absolute interpretation of the latter."""
+        return urljoin(self.url, url)
+
+    @property
+    def text(self):
+        """For subclasses of TextResponse, this will return the body
+        as str
+        """
+        raise AttributeError("Response content isn't text")
+
+    def css(self, *a, **kw):
+        """Shortcut method implemented only by responses whose content
+        is text (subclasses of TextResponse).
+        """
+        raise NotSupported("Response content isn't text")
+
+    def xpath(self, *a, **kw):
+        """Shortcut method implemented only by responses whose content
+        is text (subclasses of TextResponse).
+        """
+        raise NotSupported("Response content isn't text")
+
+    def follow(self, url, callback=None, method='GET', headers=None, body=None,
+               cookies=None, meta=None, encoding='utf-8', priority=0,
+               dont_filter=False, errback=None, cb_kwargs=None, flags=None):
+        # type: (...) -> Request
+        """
+        Return a :class:`~.Request` instance to follow a link ``url``.
+        It accepts the same arguments as ``Request.__init__`` method,
+        but ``url`` can be a relative URL or a ``scrapy.link.Link`` object,
+        not only an absolute URL.
+
+        :class:`~.TextResponse` provides a :meth:`~.TextResponse.follow`
+        method which supports selectors in addition to absolute/relative URLs
+        and Link objects.
+
+        .. versionadded:: 2.0
+           The *flags* parameter.
+        """
+        if isinstance(url, Link):
+            url = url.url
+        elif url is None:
+            raise ValueError("url can't be None")
+        url = self.urljoin(url)
+
+        return Request(
+            url=url,
+            callback=callback,
+            method=method,
+            headers=headers,
+            body=body,
+            cookies=cookies,
+            meta=meta,
+            encoding=encoding,
+            priority=priority,
+            dont_filter=dont_filter,
+            errback=errback,
+            cb_kwargs=cb_kwargs,
+            flags=flags,
+        )
+
+    def follow_all(self, urls, callback=None, method='GET', headers=None, body=None,
+                   cookies=None, meta=None, encoding='utf-8', priority=0,
+                   dont_filter=False, errback=None, cb_kwargs=None, flags=None):
+        # type: (...) -> Generator[Request, None, None]
+        """
+        .. versionadded:: 2.0
+
+        Return an iterable of :class:`~.Request` instances to follow all links
+        in ``urls``. It accepts the same arguments as ``Request.__init__`` method,
+        but elements of ``urls`` can be relative URLs or :class:`~scrapy.link.Link` objects,
+        not only absolute URLs.
+
+        :class:`~.TextResponse` provides a :meth:`~.TextResponse.follow_all`
+        method which supports selectors in addition to absolute/relative URLs
+        and Link objects.
+        """
+        if not hasattr(urls, '__iter__'):
+            raise TypeError("'urls' argument must be an iterable")
+        return (
+            self.follow(
+                url=url,
+                callback=callback,
+                method=method,
+                headers=headers,
+                body=body,
+                cookies=cookies,
+                meta=meta,
+                encoding=encoding,
+                priority=priority,
+                dont_filter=dont_filter,
+                errback=errback,
+                cb_kwargs=cb_kwargs,
+                flags=flags,
+            )
+            for url in urls
+        )
diff --git a/scrapy/http/response/html.py b/scrapy/http/response/html.py
index bd3559fbb..7eed052c2 100644
--- a/scrapy/http/response/html.py
+++ b/scrapy/http/response/html.py
@@ -7,5 +7,6 @@ See documentation in docs/topics/request-response.rst
 
 from scrapy.http.response.text import TextResponse
 
+
 class HtmlResponse(TextResponse):
     pass
diff --git a/scrapy/http/response/text.py b/scrapy/http/response/text.py
index 14030d8e5..a7bb34d48 100644
--- a/scrapy/http/response/text.py
+++ b/scrapy/http/response/text.py
@@ -5,41 +5,53 @@ discovering (through HTTP headers) to base Response class.
 See documentation in docs/topics/request-response.rst
 """
 
-from w3lib.encoding import html_to_unicode, resolve_encoding, \
-    html_body_declared_encoding, http_content_type_encoding
+import json
+import warnings
+from contextlib import suppress
+from typing import Generator
+from urllib.parse import urljoin
+
+import parsel
+from w3lib.encoding import (html_body_declared_encoding, html_to_unicode,
+                            http_content_type_encoding, resolve_encoding)
+from w3lib.html import strip_html5_whitespace
+
+from scrapy.exceptions import ScrapyDeprecationWarning
+from scrapy.http import Request
 from scrapy.http.response import Response
-from scrapy.utils.python import memoizemethod_noargs
+from scrapy.utils.python import memoizemethod_noargs, to_unicode
+from scrapy.utils.response import get_base_url
+
+_NONE = object()
 
 
 class TextResponse(Response):
 
     _DEFAULT_ENCODING = 'ascii'
+    _cached_decoded_json = _NONE
 
     def __init__(self, *args, **kwargs):
         self._encoding = kwargs.pop('encoding', None)
         self._cached_benc = None
         self._cached_ubody = None
         self._cached_selector = None
-        super(TextResponse, self).__init__(*args, **kwargs)
+        super().__init__(*args, **kwargs)
 
     def _set_url(self, url):
-        if isinstance(url, unicode):
-            if self.encoding is None:
-                raise TypeError('Cannot convert unicode url - %s has no encoding' %
-                    type(self).__name__)
-            self._url = url.encode(self.encoding)
+        if isinstance(url, str):
+            self._url = to_unicode(url, self.encoding)
         else:
-            super(TextResponse, self)._set_url(url)
+            super()._set_url(url)
 
     def _set_body(self, body):
-        self._body = ''
-        if isinstance(body, unicode):
-            if self.encoding is None:
+        self._body = b''  # used by encoding detection
+        if isinstance(body, str):
+            if self._encoding is None:
                 raise TypeError('Cannot convert unicode body - %s has no encoding' %
-                    type(self).__name__)
+                                type(self).__name__)
             self._body = body.encode(self._encoding)
         else:
-            super(TextResponse, self)._set_body(body)
+            super()._set_body(body)
 
     def replace(self, *args, **kwargs):
         kwargs.setdefault('encoding', self.encoding)
@@ -50,12 +62,33 @@ class TextResponse(Response):
         return self._declared_encoding() or self._body_inferred_encoding()
 
     def _declared_encoding(self):
-        return self._encoding or self._headers_encoding() \
+        return (
+            self._encoding
+            or self._headers_encoding()
             or self._body_declared_encoding()
+        )
 
     def body_as_unicode(self):
         """Return body as unicode"""
-        # check for self.encoding before _cached_ubody just in
+        warnings.warn('Response.body_as_unicode() is deprecated, '
+                      'please use Response.text instead.',
+                      ScrapyDeprecationWarning, stacklevel=2)
+        return self.text
+
+    def json(self):
+        """
+        .. versionadded:: 2.2
+
+        Deserialize a JSON document to a Python object.
+        """
+        if self._cached_decoded_json is _NONE:
+            self._cached_decoded_json = json.loads(self.text)
+        return self._cached_decoded_json
+
+    @property
+    def text(self):
+        """ Body as unicode """
+        # access self.encoding before _cached_ubody to make sure
         # _body_inferred_encoding is called
         benc = self.encoding
         if self._cached_ubody is None:
@@ -63,17 +96,22 @@ class TextResponse(Response):
             self._cached_ubody = html_to_unicode(charset, self.body)[1]
         return self._cached_ubody
 
+    def urljoin(self, url):
+        """Join this Response's url with a possible relative url to form an
+        absolute interpretation of the latter."""
+        return urljoin(get_base_url(self), url)
+
     @memoizemethod_noargs
     def _headers_encoding(self):
-        content_type = self.headers.get('Content-Type')
-        return http_content_type_encoding(content_type)
+        content_type = self.headers.get(b'Content-Type', b'')
+        return http_content_type_encoding(to_unicode(content_type))
 
     def _body_inferred_encoding(self):
         if self._cached_benc is None:
-            content_type = self.headers.get('Content-Type')
-            benc, ubody = html_to_unicode(content_type, self.body, \
-                    auto_detect_fun=self._auto_detect_fun, \
-                    default_encoding=self._DEFAULT_ENCODING)
+            content_type = to_unicode(self.headers.get(b'Content-Type', b''))
+            benc, ubody = html_to_unicode(content_type, self.body,
+                                          auto_detect_fun=self._auto_detect_fun,
+                                          default_encoding=self._DEFAULT_ENCODING)
             self._cached_benc = benc
             self._cached_ubody = ubody
         return self._cached_benc
@@ -97,8 +135,132 @@ class TextResponse(Response):
             self._cached_selector = Selector(self)
         return self._cached_selector
 
-    def xpath(self, query):
-        return self.selector.xpath(query)
+    def xpath(self, query, **kwargs):
+        return self.selector.xpath(query, **kwargs)
 
     def css(self, query):
         return self.selector.css(query)
+
+    def follow(self, url, callback=None, method='GET', headers=None, body=None,
+               cookies=None, meta=None, encoding=None, priority=0,
+               dont_filter=False, errback=None, cb_kwargs=None, flags=None):
+        # type: (...) -> Request
+        """
+        Return a :class:`~.Request` instance to follow a link ``url``.
+        It accepts the same arguments as ``Request.__init__`` method,
+        but ``url`` can be not only an absolute URL, but also
+
+        * a relative URL
+        * a :class:`~scrapy.link.Link` object, e.g. the result of
+          :ref:`topics-link-extractors`
+        * a :class:`~scrapy.selector.Selector` object for a ``<link>`` or ``<a>`` element, e.g.
+          ``response.css('a.my_link')[0]``
+        * an attribute :class:`~scrapy.selector.Selector` (not SelectorList), e.g.
+          ``response.css('a::attr(href)')[0]`` or
+          ``response.xpath('//img/@src')[0]``
+
+        See :ref:`response-follow-example` for usage examples.
+        """
+        if isinstance(url, parsel.Selector):
+            url = _url_from_selector(url)
+        elif isinstance(url, parsel.SelectorList):
+            raise ValueError("SelectorList is not supported")
+        encoding = self.encoding if encoding is None else encoding
+        return super().follow(
+            url=url,
+            callback=callback,
+            method=method,
+            headers=headers,
+            body=body,
+            cookies=cookies,
+            meta=meta,
+            encoding=encoding,
+            priority=priority,
+            dont_filter=dont_filter,
+            errback=errback,
+            cb_kwargs=cb_kwargs,
+            flags=flags,
+        )
+
+    def follow_all(self, urls=None, callback=None, method='GET', headers=None, body=None,
+                   cookies=None, meta=None, encoding=None, priority=0,
+                   dont_filter=False, errback=None, cb_kwargs=None, flags=None,
+                   css=None, xpath=None):
+        # type: (...) -> Generator[Request, None, None]
+        """
+        A generator that produces :class:`~.Request` instances to follow all
+        links in ``urls``. It accepts the same arguments as the :class:`~.Request`'s
+        ``__init__`` method, except that each ``urls`` element does not need to be
+        an absolute URL, it can be any of the following:
+
+        * a relative URL
+        * a :class:`~scrapy.link.Link` object, e.g. the result of
+          :ref:`topics-link-extractors`
+        * a :class:`~scrapy.selector.Selector` object for a ``<link>`` or ``<a>`` element, e.g.
+          ``response.css('a.my_link')[0]``
+        * an attribute :class:`~scrapy.selector.Selector` (not SelectorList), e.g.
+          ``response.css('a::attr(href)')[0]`` or
+          ``response.xpath('//img/@src')[0]``
+
+        In addition, ``css`` and ``xpath`` arguments are accepted to perform the link extraction
+        within the ``follow_all`` method (only one of ``urls``, ``css`` and ``xpath`` is accepted).
+
+        Note that when passing a ``SelectorList`` as argument for the ``urls`` parameter or
+        using the ``css`` or ``xpath`` parameters, this method will not produce requests for
+        selectors from which links cannot be obtained (for instance, anchor tags without an
+        ``href`` attribute)
+        """
+        arguments = [x for x in (urls, css, xpath) if x is not None]
+        if len(arguments) != 1:
+            raise ValueError(
+                "Please supply exactly one of the following arguments: urls, css, xpath"
+            )
+        if not urls:
+            if css:
+                urls = self.css(css)
+            if xpath:
+                urls = self.xpath(xpath)
+        if isinstance(urls, parsel.SelectorList):
+            selectors = urls
+            urls = []
+            for sel in selectors:
+                with suppress(_InvalidSelector):
+                    urls.append(_url_from_selector(sel))
+        return super().follow_all(
+            urls=urls,
+            callback=callback,
+            method=method,
+            headers=headers,
+            body=body,
+            cookies=cookies,
+            meta=meta,
+            encoding=encoding,
+            priority=priority,
+            dont_filter=dont_filter,
+            errback=errback,
+            cb_kwargs=cb_kwargs,
+            flags=flags,
+        )
+
+
+class _InvalidSelector(ValueError):
+    """
+    Raised when a URL cannot be obtained from a Selector
+    """
+
+
+def _url_from_selector(sel):
+    # type: (parsel.Selector) -> str
+    if isinstance(sel.root, str):
+        # e.g. ::attr(href) result
+        return strip_html5_whitespace(sel.root)
+    if not hasattr(sel.root, 'tag'):
+        raise _InvalidSelector("Unsupported selector: %s" % sel)
+    if sel.root.tag not in ('a', 'link'):
+        raise _InvalidSelector("Only <a> and <link> elements are supported; got <%s>" %
+                               sel.root.tag)
+    href = sel.root.get('href')
+    if href is None:
+        raise _InvalidSelector("<%s> element has no href attribute: %s" %
+                               (sel.root.tag, sel))
+    return strip_html5_whitespace(href)
diff --git a/scrapy/http/response/xml.py b/scrapy/http/response/xml.py
index 1df33fee5..abf474a2f 100644
--- a/scrapy/http/response/xml.py
+++ b/scrapy/http/response/xml.py
@@ -7,5 +7,6 @@ See documentation in docs/topics/request-response.rst
 
 from scrapy.http.response.text import TextResponse
 
+
 class XmlResponse(TextResponse):
     pass
diff --git a/scrapy/interfaces.py b/scrapy/interfaces.py
index 5d8d85aca..1896ec31e 100644
--- a/scrapy/interfaces.py
+++ b/scrapy/interfaces.py
@@ -1,15 +1,18 @@
 from zope.interface import Interface
 
-class ISpiderManager(Interface):
 
-    def create(spider_name, **spider_args):
-        """Returns a new Spider instance for the given spider name, and using
-        the given spider arguments. If the spider name is not found, it must
-        raise a KeyError."""
+class ISpiderLoader(Interface):
+
+    def from_settings(settings):
+        """Return an instance of the class for the given settings"""
+
+    def load(spider_name):
+        """Return the Spider class for the given spider name. If the spider
+        name is not found, it must raise a KeyError."""
 
     def list():
         """Return a list with the names of all spiders available in the
         project"""
 
     def find_by_request(request):
-        """Returns the list of spiders names that can handle the given request"""
+        """Return the list of spiders names that can handle the given request"""
diff --git a/scrapy/item.py b/scrapy/item.py
index 9998010b2..c262a153c 100644
--- a/scrapy/item.py
+++ b/scrapy/item.py
@@ -4,48 +4,89 @@ Scrapy Item
 See documentation in docs/topics/item.rst
 """
 
-from pprint import pformat
-from collections import MutableMapping
 from abc import ABCMeta
-import six
+from collections.abc import MutableMapping
+from copy import deepcopy
+from pprint import pformat
+from warnings import warn
 
+from scrapy.utils.deprecate import ScrapyDeprecationWarning
 from scrapy.utils.trackref import object_ref
 
 
-class BaseItem(object_ref):
-    """Base class for all scraped items."""
+class _BaseItem(object_ref):
+    """
+    Temporary class used internally to avoid the deprecation
+    warning raised by isinstance checks using BaseItem.
+    """
     pass
 
 
+class _BaseItemMeta(ABCMeta):
+    def __instancecheck__(cls, instance):
+        if cls is BaseItem:
+            warn('scrapy.item.BaseItem is deprecated, please use scrapy.item.Item instead',
+                 ScrapyDeprecationWarning, stacklevel=2)
+        return super().__instancecheck__(instance)
+
+
+class BaseItem(_BaseItem, metaclass=_BaseItemMeta):
+    """
+    Deprecated, please use :class:`scrapy.item.Item` instead
+    """
+
+    def __new__(cls, *args, **kwargs):
+        if issubclass(cls, BaseItem) and not issubclass(cls, (Item, DictItem)):
+            warn('scrapy.item.BaseItem is deprecated, please use scrapy.item.Item instead',
+                 ScrapyDeprecationWarning, stacklevel=2)
+        return super().__new__(cls, *args, **kwargs)
+
+
 class Field(dict):
     """Container of field metadata"""
 
 
-class ItemMeta(ABCMeta):
+class ItemMeta(_BaseItemMeta):
+    """Metaclass_ of :class:`Item` that handles field definitions.
+
+    .. _metaclass: https://realpython.com/python-metaclasses
+    """
 
     def __new__(mcs, class_name, bases, attrs):
-        fields = {}
+        classcell = attrs.pop('__classcell__', None)
+        new_bases = tuple(base._class for base in bases if hasattr(base, '_class'))
+        _class = super().__new__(mcs, 'x_' + class_name, new_bases, attrs)
+
+        fields = getattr(_class, 'fields', {})
         new_attrs = {}
-        for n, v in six.iteritems(attrs):
+        for n in dir(_class):
+            v = getattr(_class, n)
             if isinstance(v, Field):
                 fields[n] = v
-            else:
-                new_attrs[n] = v
+            elif n in attrs:
+                new_attrs[n] = attrs[n]
 
-        cls = super(ItemMeta, mcs).__new__(mcs, class_name, bases, new_attrs)
-        cls.fields = cls.fields.copy()
-        cls.fields.update(fields)
-        return cls
+        new_attrs['fields'] = fields
+        new_attrs['_class'] = _class
+        if classcell is not None:
+            new_attrs['__classcell__'] = classcell
+        return super().__new__(mcs, class_name, bases, new_attrs)
 
 
 class DictItem(MutableMapping, BaseItem):
 
     fields = {}
 
+    def __new__(cls, *args, **kwargs):
+        if issubclass(cls, DictItem) and not issubclass(cls, Item):
+            warn('scrapy.item.DictItem is deprecated, please use scrapy.item.Item instead',
+                 ScrapyDeprecationWarning, stacklevel=2)
+        return super().__new__(cls, *args, **kwargs)
+
     def __init__(self, *args, **kwargs):
         self._values = {}
         if args or kwargs:  # avoid creating dict for most common case
-            for k, v in six.iteritems(dict(*args, **kwargs)):
+            for k, v in dict(*args, **kwargs).items():
                 self[k] = v
 
     def __getitem__(self, key):
@@ -55,8 +96,7 @@ class DictItem(MutableMapping, BaseItem):
         if key in self.fields:
             self._values[key] = value
         else:
-            raise KeyError("%s does not support field: %s" %
-                (self.__class__.__name__, key))
+            raise KeyError("%s does not support field: %s" % (self.__class__.__name__, key))
 
     def __delitem__(self, key):
         del self._values[key]
@@ -68,9 +108,8 @@ class DictItem(MutableMapping, BaseItem):
 
     def __setattr__(self, name, value):
         if not name.startswith('_'):
-            raise AttributeError("Use item[%r] = %r to set field value" %
-                (name, value))
-        super(DictItem, self).__setattr__(name, value)
+            raise AttributeError("Use item[%r] = %r to set field value" % (name, value))
+        super().__setattr__(name, value)
 
     def __len__(self):
         return len(self._values)
@@ -89,7 +128,31 @@ class DictItem(MutableMapping, BaseItem):
     def copy(self):
         return self.__class__(self)
 
+    def deepcopy(self):
+        """Return a :func:`~copy.deepcopy` of this item.
+        """
+        return deepcopy(self)
 
-@six.add_metaclass(ItemMeta)
-class Item(DictItem):
-    pass
+
+class Item(DictItem, metaclass=ItemMeta):
+    """
+    Base class for scraped items.
+
+    In Scrapy, an object is considered an ``item`` if it is an instance of either
+    :class:`Item` or :class:`dict`, or any subclass. For example, when the output of a
+    spider callback is evaluated, only instances of :class:`Item` or
+    :class:`dict` are passed to :ref:`item pipelines <topics-item-pipeline>`.
+
+    If you need instances of a custom class to be considered items by Scrapy,
+    you must inherit from either :class:`Item` or :class:`dict`.
+
+    Items must declare :class:`Field` attributes, which are processed and stored
+    in the ``fields`` attribute. This restricts the set of allowed field names
+    and prevents typos, raising ``KeyError`` when referring to undefined fields.
+    Additionally, fields can be used to define metadata and control the way
+    data is processed internally. Please refer to the :ref:`documentation
+    about fields <topics-items-fields>` for additional information.
+
+    Unlike instances of :class:`dict`, instances of :class:`Item` may be
+    :ref:`tracked <topics-leaks-trackrefs>` to debug memory leaks.
+    """
diff --git a/scrapy/link.py b/scrapy/link.py
index 42c0e4f48..1ef50b113 100644
--- a/scrapy/link.py
+++ b/scrapy/link.py
@@ -1,36 +1,38 @@
 """
 This module defines the Link object used in Link extractors.
 
-For actual link extractors implementation see scrapy.contrib.linkextractor, or
+For actual link extractors implementation see scrapy.linkextractors, or
 its documentation in: docs/topics/link-extractors.rst
 """
 
-import six
 
-class Link(object):
+class Link:
     """Link objects represent an extracted link by the LinkExtractor."""
 
     __slots__ = ['url', 'text', 'fragment', 'nofollow']
 
     def __init__(self, url, text='', fragment='', nofollow=False):
-        if isinstance(url, six.text_type):
-            import warnings
-            warnings.warn("Do not instantiate Link objects with unicode urls. "
-                "Assuming utf-8 encoding (which could be wrong)")
-            url = url.encode('utf-8')
+        if not isinstance(url, str):
+            got = url.__class__.__name__
+            raise TypeError("Link urls must be str objects, got %s" % got)
         self.url = url
         self.text = text
         self.fragment = fragment
         self.nofollow = nofollow
 
     def __eq__(self, other):
-        return self.url == other.url and self.text == other.text and \
-            self.fragment == other.fragment and self.nofollow == other.nofollow
+        return (
+            self.url == other.url
+            and self.text == other.text
+            and self.fragment == other.fragment
+            and self.nofollow == other.nofollow
+        )
 
     def __hash__(self):
         return hash(self.url) ^ hash(self.text) ^ hash(self.fragment) ^ hash(self.nofollow)
 
     def __repr__(self):
-        return 'Link(url=%r, text=%r, fragment=%r, nofollow=%r)' % \
-            (self.url, self.text, self.fragment, self.nofollow)
-
+        return (
+            'Link(url=%r, text=%r, fragment=%r, nofollow=%r)'
+            % (self.url, self.text, self.fragment, self.nofollow)
+        )
diff --git a/scrapy/linkextractor.py b/scrapy/linkextractor.py
deleted file mode 100644
index 5badea5e5..000000000
--- a/scrapy/linkextractor.py
+++ /dev/null
@@ -1,94 +0,0 @@
-"""
-Common code and definitions used by Link extractors (located in
-scrapy.contrib.linkextractor).
-"""
-import re
-from six.moves.urllib.parse import urlparse
-
-from scrapy.utils.url import url_is_from_any_domain
-from scrapy.utils.url import canonicalize_url, url_is_from_any_domain, url_has_any_extension
-from scrapy.utils.misc import arg_to_iter
-
-
-# common file extensions that are not followed if they occur in links
-IGNORED_EXTENSIONS = [
-    # images
-    'mng', 'pct', 'bmp', 'gif', 'jpg', 'jpeg', 'png', 'pst', 'psp', 'tif',
-    'tiff', 'ai', 'drw', 'dxf', 'eps', 'ps', 'svg',
-
-    # audio
-    'mp3', 'wma', 'ogg', 'wav', 'ra', 'aac', 'mid', 'au', 'aiff',
-
-    # video
-    '3gp', 'asf', 'asx', 'avi', 'mov', 'mp4', 'mpg', 'qt', 'rm', 'swf', 'wmv',
-    'm4a',
-
-    # office suites
-    'xls', 'xlsx', 'ppt', 'pptx', 'doc', 'docx', 'odt', 'ods', 'odg', 'odp',
-
-    # other
-    'css', 'pdf', 'exe', 'bin', 'rss', 'zip', 'rar',
-]
-
-
-_re_type = type(re.compile("", 0))
-_matches = lambda url, regexs: any((r.search(url) for r in regexs))
-_is_valid_url = lambda url: url.split('://', 1)[0] in set(['http', 'https', 'file'])
-
-
-class FilteringLinkExtractor(object):
-
-    def __init__(self, link_extractor, allow, deny, allow_domains, deny_domains,
-                 restrict_xpaths, canonicalize, deny_extensions):
-
-        self.link_extractor = link_extractor
-
-        self.allow_res = [x if isinstance(x, _re_type) else re.compile(x) for x in arg_to_iter(allow)]
-        self.deny_res = [x if isinstance(x, _re_type) else re.compile(x) for x in arg_to_iter(deny)]
-
-        self.allow_domains = set(arg_to_iter(allow_domains))
-        self.deny_domains = set(arg_to_iter(deny_domains))
-
-        self.restrict_xpaths = tuple(arg_to_iter(restrict_xpaths))
-        self.canonicalize = canonicalize
-        if deny_extensions is None:
-            deny_extensions = IGNORED_EXTENSIONS
-        self.deny_extensions = set(['.' + e for e in arg_to_iter(deny_extensions)])
-
-    def _link_allowed(self, link):
-        if not _is_valid_url(link.url):
-            return False
-        if self.allow_res and not _matches(link.url, self.allow_res):
-            return False
-        if self.deny_res and _matches(link.url, self.deny_res):
-            return False
-        parsed_url = urlparse(link.url)
-        if self.allow_domains and not url_is_from_any_domain(parsed_url, self.allow_domains):
-            return False
-        if self.deny_domains and url_is_from_any_domain(parsed_url, self.deny_domains):
-            return False
-        if self.deny_extensions and url_has_any_extension(parsed_url, self.deny_extensions):
-            return False
-        return True
-
-    def matches(self, url):
-
-        if self.allow_domains and not url_is_from_any_domain(url, self.allow_domains):
-            return False
-        if self.deny_domains and url_is_from_any_domain(url, self.deny_domains):
-            return False
-
-        allowed = [regex.search(url) for regex in self.allow_res] if self.allow_res else [True]
-        denied = [regex.search(url) for regex in self.deny_res] if self.deny_res else []
-        return any(allowed) and not any(denied)
-
-    def _process_links(self, links):
-        links = [x for x in links if self._link_allowed(x)]
-        if self.canonicalize:
-            for link in links:
-                link.url = canonicalize_url(urlparse(link.url))
-        links = self.link_extractor._process_links(links)
-        return links
-
-    def _extract_links(self, *args, **kwargs):
-        return self.link_extractor._extract_links(*args, **kwargs)
diff --git a/scrapy/linkextractors/__init__.py b/scrapy/linkextractors/__init__.py
new file mode 100644
index 000000000..08a6ca1e8
--- /dev/null
+++ b/scrapy/linkextractors/__init__.py
@@ -0,0 +1,136 @@
+"""
+scrapy.linkextractors
+
+This package contains a collection of Link Extractors.
+
+For more info see docs/topics/link-extractors.rst
+"""
+import re
+from urllib.parse import urlparse
+from warnings import warn
+
+from parsel.csstranslator import HTMLTranslator
+from w3lib.url import canonicalize_url
+
+from scrapy.utils.deprecate import ScrapyDeprecationWarning
+from scrapy.utils.misc import arg_to_iter
+from scrapy.utils.url import (
+    url_is_from_any_domain, url_has_any_extension,
+)
+
+
+# common file extensions that are not followed if they occur in links
+IGNORED_EXTENSIONS = [
+    # archives
+    '7z', '7zip', 'bz2', 'rar', 'tar', 'tar.gz', 'xz', 'zip',
+
+    # images
+    'mng', 'pct', 'bmp', 'gif', 'jpg', 'jpeg', 'png', 'pst', 'psp', 'tif',
+    'tiff', 'ai', 'drw', 'dxf', 'eps', 'ps', 'svg', 'cdr', 'ico',
+
+    # audio
+    'mp3', 'wma', 'ogg', 'wav', 'ra', 'aac', 'mid', 'au', 'aiff',
+
+    # video
+    '3gp', 'asf', 'asx', 'avi', 'mov', 'mp4', 'mpg', 'qt', 'rm', 'swf', 'wmv',
+    'm4a', 'm4v', 'flv', 'webm',
+
+    # office suites
+    'xls', 'xlsx', 'ppt', 'pptx', 'pps', 'doc', 'docx', 'odt', 'ods', 'odg',
+    'odp',
+
+    # other
+    'css', 'pdf', 'exe', 'bin', 'rss', 'dmg', 'iso', 'apk'
+]
+
+
+_re_type = type(re.compile("", 0))
+
+
+def _matches(url, regexs):
+    return any(r.search(url) for r in regexs)
+
+
+def _is_valid_url(url):
+    return url.split('://', 1)[0] in {'http', 'https', 'file', 'ftp'}
+
+
+class FilteringLinkExtractor:
+
+    _csstranslator = HTMLTranslator()
+
+    def __new__(cls, *args, **kwargs):
+        from scrapy.linkextractors.lxmlhtml import LxmlLinkExtractor
+        if issubclass(cls, FilteringLinkExtractor) and not issubclass(cls, LxmlLinkExtractor):
+            warn('scrapy.linkextractors.FilteringLinkExtractor is deprecated, '
+                 'please use scrapy.linkextractors.LinkExtractor instead',
+                 ScrapyDeprecationWarning, stacklevel=2)
+        return super().__new__(cls)
+
+    def __init__(self, link_extractor, allow, deny, allow_domains, deny_domains,
+                 restrict_xpaths, canonicalize, deny_extensions, restrict_css, restrict_text):
+
+        self.link_extractor = link_extractor
+
+        self.allow_res = [x if isinstance(x, _re_type) else re.compile(x)
+                          for x in arg_to_iter(allow)]
+        self.deny_res = [x if isinstance(x, _re_type) else re.compile(x)
+                         for x in arg_to_iter(deny)]
+
+        self.allow_domains = set(arg_to_iter(allow_domains))
+        self.deny_domains = set(arg_to_iter(deny_domains))
+
+        self.restrict_xpaths = tuple(arg_to_iter(restrict_xpaths))
+        self.restrict_xpaths += tuple(map(self._csstranslator.css_to_xpath,
+                                          arg_to_iter(restrict_css)))
+
+        self.canonicalize = canonicalize
+        if deny_extensions is None:
+            deny_extensions = IGNORED_EXTENSIONS
+        self.deny_extensions = {'.' + e for e in arg_to_iter(deny_extensions)}
+        self.restrict_text = [x if isinstance(x, _re_type) else re.compile(x)
+                              for x in arg_to_iter(restrict_text)]
+
+    def _link_allowed(self, link):
+        if not _is_valid_url(link.url):
+            return False
+        if self.allow_res and not _matches(link.url, self.allow_res):
+            return False
+        if self.deny_res and _matches(link.url, self.deny_res):
+            return False
+        parsed_url = urlparse(link.url)
+        if self.allow_domains and not url_is_from_any_domain(parsed_url, self.allow_domains):
+            return False
+        if self.deny_domains and url_is_from_any_domain(parsed_url, self.deny_domains):
+            return False
+        if self.deny_extensions and url_has_any_extension(parsed_url, self.deny_extensions):
+            return False
+        if self.restrict_text and not _matches(link.text, self.restrict_text):
+            return False
+        return True
+
+    def matches(self, url):
+
+        if self.allow_domains and not url_is_from_any_domain(url, self.allow_domains):
+            return False
+        if self.deny_domains and url_is_from_any_domain(url, self.deny_domains):
+            return False
+
+        allowed = (regex.search(url) for regex in self.allow_res) if self.allow_res else [True]
+        denied = (regex.search(url) for regex in self.deny_res) if self.deny_res else []
+        return any(allowed) and not any(denied)
+
+    def _process_links(self, links):
+        links = [x for x in links if self._link_allowed(x)]
+        if self.canonicalize:
+            for link in links:
+                link.url = canonicalize_url(link.url)
+        links = self.link_extractor._process_links(links)
+        return links
+
+    def _extract_links(self, *args, **kwargs):
+        return self.link_extractor._extract_links(*args, **kwargs)
+
+
+# Top-level imports
+from scrapy.linkextractors.lxmlhtml import LxmlLinkExtractor as LinkExtractor
diff --git a/scrapy/linkextractors/lxmlhtml.py b/scrapy/linkextractors/lxmlhtml.py
new file mode 100644
index 000000000..e941c4321
--- /dev/null
+++ b/scrapy/linkextractors/lxmlhtml.py
@@ -0,0 +1,164 @@
+"""
+Link extractor based on lxml.html
+"""
+import operator
+from functools import partial
+from urllib.parse import urljoin
+
+import lxml.etree as etree
+from w3lib.html import strip_html5_whitespace
+from w3lib.url import canonicalize_url, safe_url_string
+
+from scrapy.link import Link
+from scrapy.linkextractors import FilteringLinkExtractor
+from scrapy.utils.misc import arg_to_iter, rel_has_nofollow
+from scrapy.utils.python import unique as unique_list
+from scrapy.utils.response import get_base_url
+
+
+# from lxml/src/lxml/html/__init__.py
+XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"
+
+_collect_string_content = etree.XPath("string()")
+
+
+def _nons(tag):
+    if isinstance(tag, str):
+        if tag[0] == '{' and tag[1:len(XHTML_NAMESPACE) + 1] == XHTML_NAMESPACE:
+            return tag.split('}')[-1]
+    return tag
+
+
+def _identity(x):
+    return x
+
+
+def _canonicalize_link_url(link):
+    return canonicalize_url(link.url, keep_fragments=True)
+
+
+class LxmlParserLinkExtractor:
+    def __init__(
+        self, tag="a", attr="href", process=None, unique=False, strip=True, canonicalized=False
+    ):
+        self.scan_tag = tag if callable(tag) else partial(operator.eq, tag)
+        self.scan_attr = attr if callable(attr) else partial(operator.eq, attr)
+        self.process_attr = process if callable(process) else _identity
+        self.unique = unique
+        self.strip = strip
+        self.link_key = operator.attrgetter("url") if canonicalized else _canonicalize_link_url
+
+    def _iter_links(self, document):
+        for el in document.iter(etree.Element):
+            if not self.scan_tag(_nons(el.tag)):
+                continue
+            attribs = el.attrib
+            for attrib in attribs:
+                if not self.scan_attr(attrib):
+                    continue
+                yield (el, attrib, attribs[attrib])
+
+    def _extract_links(self, selector, response_url, response_encoding, base_url):
+        links = []
+        # hacky way to get the underlying lxml parsed document
+        for el, attr, attr_val in self._iter_links(selector.root):
+            # pseudo lxml.html.HtmlElement.make_links_absolute(base_url)
+            try:
+                if self.strip:
+                    attr_val = strip_html5_whitespace(attr_val)
+                attr_val = urljoin(base_url, attr_val)
+            except ValueError:
+                continue  # skipping bogus links
+            else:
+                url = self.process_attr(attr_val)
+                if url is None:
+                    continue
+            url = safe_url_string(url, encoding=response_encoding)
+            # to fix relative links after process_value
+            url = urljoin(response_url, url)
+            link = Link(url, _collect_string_content(el) or '',
+                        nofollow=rel_has_nofollow(el.get('rel')))
+            links.append(link)
+        return self._deduplicate_if_needed(links)
+
+    def extract_links(self, response):
+        base_url = get_base_url(response)
+        return self._extract_links(response.selector, response.url, response.encoding, base_url)
+
+    def _process_links(self, links):
+        """ Normalize and filter extracted links
+
+        The subclass should override it if neccessary
+        """
+        return self._deduplicate_if_needed(links)
+
+    def _deduplicate_if_needed(self, links):
+        if self.unique:
+            return unique_list(links, key=self.link_key)
+        return links
+
+
+class LxmlLinkExtractor(FilteringLinkExtractor):
+
+    def __init__(
+        self,
+        allow=(),
+        deny=(),
+        allow_domains=(),
+        deny_domains=(),
+        restrict_xpaths=(),
+        tags=('a', 'area'),
+        attrs=('href',),
+        canonicalize=False,
+        unique=True,
+        process_value=None,
+        deny_extensions=None,
+        restrict_css=(),
+        strip=True,
+        restrict_text=None,
+    ):
+        tags, attrs = set(arg_to_iter(tags)), set(arg_to_iter(attrs))
+        lx = LxmlParserLinkExtractor(
+            tag=partial(operator.contains, tags),
+            attr=partial(operator.contains, attrs),
+            unique=unique,
+            process=process_value,
+            strip=strip,
+            canonicalized=canonicalize
+        )
+        super().__init__(
+            link_extractor=lx,
+            allow=allow,
+            deny=deny,
+            allow_domains=allow_domains,
+            deny_domains=deny_domains,
+            restrict_xpaths=restrict_xpaths,
+            restrict_css=restrict_css,
+            canonicalize=canonicalize,
+            deny_extensions=deny_extensions,
+            restrict_text=restrict_text,
+        )
+
+    def extract_links(self, response):
+        """Returns a list of :class:`~scrapy.link.Link` objects from the
+        specified :class:`response <scrapy.http.Response>`.
+
+        Only links that match the settings passed to the ``__init__`` method of
+        the link extractor are returned.
+
+        Duplicate links are omitted.
+        """
+        base_url = get_base_url(response)
+        if self.restrict_xpaths:
+            docs = [
+                subdoc
+                for x in self.restrict_xpaths
+                for subdoc in response.xpath(x)
+            ]
+        else:
+            docs = [response.selector]
+        all_links = []
+        for doc in docs:
+            links = self._extract_links(doc, response.url, response.encoding, base_url)
+            all_links.extend(self._process_links(links))
+        return unique_list(all_links)
diff --git a/scrapy/loader/__init__.py b/scrapy/loader/__init__.py
new file mode 100644
index 000000000..014951a8e
--- /dev/null
+++ b/scrapy/loader/__init__.py
@@ -0,0 +1,88 @@
+"""
+Item Loader
+
+See documentation in docs/topics/loaders.rst
+"""
+import itemloaders
+
+from scrapy.item import Item
+from scrapy.selector import Selector
+
+
+class ItemLoader(itemloaders.ItemLoader):
+    """
+    A user-friendly abstraction to populate an :ref:`item <topics-items>` with data
+    by applying :ref:`field processors <topics-loaders-processors>` to scraped data.
+    When instantiated with a ``selector`` or a ``response`` it supports
+    data extraction from web pages using :ref:`selectors <topics-selectors>`.
+
+    :param item: The item instance to populate using subsequent calls to
+        :meth:`~ItemLoader.add_xpath`, :meth:`~ItemLoader.add_css`,
+        or :meth:`~ItemLoader.add_value`.
+    :type item: scrapy.item.Item
+
+    :param selector: The selector to extract data from, when using the
+        :meth:`add_xpath`, :meth:`add_css`, :meth:`replace_xpath`, or
+        :meth:`replace_css` method.
+    :type selector: :class:`~scrapy.selector.Selector` object
+
+    :param response: The response used to construct the selector using the
+        :attr:`default_selector_class`, unless the selector argument is given,
+        in which case this argument is ignored.
+    :type response: :class:`~scrapy.http.Response` object
+
+    If no item is given, one is instantiated automatically using the class in
+    :attr:`default_item_class`.
+
+    The item, selector, response and remaining keyword arguments are
+    assigned to the Loader context (accessible through the :attr:`context` attribute).
+
+    .. attribute:: item
+
+        The item object being parsed by this Item Loader.
+        This is mostly used as a property so, when attempting to override this
+        value, you may want to check out :attr:`default_item_class` first.
+
+    .. attribute:: context
+
+        The currently active :ref:`Context <loaders-context>` of this Item Loader.
+
+    .. attribute:: default_item_class
+
+        An :ref:`item <topics-items>` class (or factory), used to instantiate
+        items when not given in the ``__init__`` method.
+
+    .. attribute:: default_input_processor
+
+        The default input processor to use for those fields which don't specify
+        one.
+
+    .. attribute:: default_output_processor
+
+        The default output processor to use for those fields which don't specify
+        one.
+
+    .. attribute:: default_selector_class
+
+        The class used to construct the :attr:`selector` of this
+        :class:`ItemLoader`, if only a response is given in the ``__init__`` method.
+        If a selector is given in the ``__init__`` method this attribute is ignored.
+        This attribute is sometimes overridden in subclasses.
+
+    .. attribute:: selector
+
+        The :class:`~scrapy.selector.Selector` object to extract data from.
+        It's either the selector given in the ``__init__`` method or one created from
+        the response given in the ``__init__`` method using the
+        :attr:`default_selector_class`. This attribute is meant to be
+        read-only.
+    """
+
+    default_item_class = Item
+    default_selector_class = Selector
+
+    def __init__(self, item=None, selector=None, response=None, parent=None, **context):
+        if selector is None and response is not None:
+            selector = self.default_selector_class(response)
+        context.update(response=response)
+        super().__init__(item=item, selector=selector, parent=parent, **context)
diff --git a/scrapy/loader/common.py b/scrapy/loader/common.py
new file mode 100644
index 000000000..3b8a6ee94
--- /dev/null
+++ b/scrapy/loader/common.py
@@ -0,0 +1,21 @@
+"""Common functions used in Item Loaders code"""
+
+import warnings
+
+from itemloaders import common
+
+from scrapy.utils.deprecate import ScrapyDeprecationWarning
+
+
+def wrap_loader_context(function, context):
+    """Wrap functions that receive loader_context to contain the context
+    "pre-loaded" and expose a interface that receives only one argument
+    """
+    warnings.warn(
+        "scrapy.loader.common.wrap_loader_context has moved to a new library."
+        "Please update your reference to itemloaders.common.wrap_loader_context",
+        ScrapyDeprecationWarning,
+        stacklevel=2
+    )
+
+    return common.wrap_loader_context(function, context)
diff --git a/scrapy/loader/processors.py b/scrapy/loader/processors.py
new file mode 100644
index 000000000..51fbd19eb
--- /dev/null
+++ b/scrapy/loader/processors.py
@@ -0,0 +1,21 @@
+"""
+This module provides some commonly used processors for Item Loaders.
+
+See documentation in docs/topics/loaders.rst
+"""
+from itemloaders import processors
+
+from scrapy.utils.deprecate import create_deprecated_class
+
+
+MapCompose = create_deprecated_class('MapCompose', processors.MapCompose)
+
+Compose = create_deprecated_class('Compose', processors.Compose)
+
+TakeFirst = create_deprecated_class('TakeFirst', processors.TakeFirst)
+
+Identity = create_deprecated_class('Identity', processors.Identity)
+
+SelectJmes = create_deprecated_class('SelectJmes', processors.SelectJmes)
+
+Join = create_deprecated_class('Join', processors.Join)
diff --git a/scrapy/log.py b/scrapy/log.py
deleted file mode 100644
index 1f32003e7..000000000
--- a/scrapy/log.py
+++ /dev/null
@@ -1,157 +0,0 @@
-"""
-Scrapy logging facility
-
-See documentation in docs/topics/logging.rst
-"""
-import sys
-import logging
-import warnings
-
-from twisted.python import log
-
-import scrapy
-from scrapy.utils.python import unicode_to_str
-from scrapy.settings import overridden_settings
-
-# Logging levels
-DEBUG = logging.DEBUG
-INFO = logging.INFO
-WARNING = logging.WARNING
-ERROR = logging.ERROR
-CRITICAL = logging.CRITICAL
-SILENT = CRITICAL + 1
-
-level_names = {
-    logging.DEBUG: "DEBUG",
-    logging.INFO: "INFO",
-    logging.WARNING: "WARNING",
-    logging.ERROR: "ERROR",
-    logging.CRITICAL: "CRITICAL",
-    SILENT: "SILENT",
-}
-
-class ScrapyFileLogObserver(log.FileLogObserver):
-
-    def __init__(self, f, level=INFO, encoding='utf-8', crawler=None):
-        self.level = level
-        self.encoding = encoding
-        if crawler:
-            self.crawler = crawler
-            self.emit = self._emit_with_crawler
-        else:
-            self.emit = self._emit
-        log.FileLogObserver.__init__(self, f)
-
-    def _emit(self, eventDict):
-        ev = _adapt_eventdict(eventDict, self.level, self.encoding)
-        if ev is not None:
-            log.FileLogObserver.emit(self, ev)
-        return ev
-
-    def _emit_with_crawler(self, eventDict):
-        ev = self._emit(eventDict)
-        if ev:
-            level = ev['logLevel']
-            sname = 'log_count/%s' % level_names.get(level, level)
-            self.crawler.stats.inc_value(sname)
-
-def _adapt_eventdict(eventDict, log_level=INFO, encoding='utf-8', prepend_level=True):
-    """Adapt Twisted log eventDict making it suitable for logging with a Scrapy
-    log observer. It may return None to indicate that the event should be
-    ignored by a Scrapy log observer.
-
-    `log_level` is the minimum level being logged, and `encoding` is the log
-    encoding.
-    """
-    ev = eventDict.copy()
-    if ev['isError']:
-        ev.setdefault('logLevel', ERROR)
-
-    # ignore non-error messages from outside scrapy
-    if ev.get('system') != 'scrapy' and not ev['isError']:
-        return
-
-    level = ev.get('logLevel')
-    if level < log_level:
-        return
-
-    spider = ev.get('spider')
-    if spider:
-        ev['system'] = unicode_to_str(spider.name, encoding)
-
-    lvlname = level_names.get(level, 'NOLEVEL')
-    message = ev.get('message')
-    if message:
-        message = [unicode_to_str(x, encoding) for x in message]
-        if prepend_level:
-            message[0] = "%s: %s" % (lvlname, message[0])
-        ev['message'] = message
-
-    why = ev.get('why')
-    if why:
-        why = unicode_to_str(why, encoding)
-        if prepend_level:
-            why = "%s: %s" % (lvlname, why)
-        ev['why'] = why
-
-    fmt = ev.get('format')
-    if fmt:
-        fmt = unicode_to_str(fmt, encoding)
-        if prepend_level:
-            fmt = "%s: %s" % (lvlname, fmt)
-        ev['format'] = fmt
-
-    return ev
-
-def _get_log_level(level_name_or_id):
-    if isinstance(level_name_or_id, int):
-        return level_name_or_id
-    elif isinstance(level_name_or_id, basestring):
-        return globals()[level_name_or_id]
-    else:
-        raise ValueError("Unknown log level: %r" % level_name_or_id)
-
-def start(logfile=None, loglevel='INFO', logstdout=True, logencoding='utf-8', crawler=None):
-    loglevel = _get_log_level(loglevel)
-    file = open(logfile, 'a') if logfile else sys.stderr
-    log_observer = ScrapyFileLogObserver(file, loglevel, logencoding, crawler)
-    _oldshowwarning = warnings.showwarning
-    log.startLoggingWithObserver(log_observer.emit, setStdout=logstdout)
-    # restore warnings, wrongly silenced by Twisted
-    warnings.showwarning = _oldshowwarning
-    return log_observer
-
-def msg(message=None, _level=INFO, **kw):
-    kw['logLevel'] = kw.pop('level', _level)
-    kw.setdefault('system', 'scrapy')
-    if message is None:
-        log.msg(**kw)
-    else:
-        log.msg(message, **kw)
-
-def err(_stuff=None, _why=None, **kw):
-    kw['logLevel'] = kw.pop('level', ERROR)
-    kw.setdefault('system', 'scrapy')
-    log.err(_stuff, _why, **kw)
-
-def start_from_settings(settings, crawler=None):
-    if settings.getbool('LOG_ENABLED'):
-        return start(settings['LOG_FILE'], settings['LOG_LEVEL'], settings['LOG_STDOUT'],
-            settings['LOG_ENCODING'], crawler)
-
-def scrapy_info(settings):
-    log_observer = start_from_settings(settings)
-    if log_observer:
-        msg("Scrapy %s started (bot: %s)" % (scrapy.__version__,
-            settings['BOT_NAME']))
-
-        msg("Optional features available: %s" % ", ".join(scrapy.optional_features),
-            level=INFO)
-
-        d = dict(overridden_settings(settings))
-        msg(format="Overridden settings: %(settings)r", settings=d, level=INFO)
-
-        log_observer.stop()
-
-def start_from_crawler(crawler):
-    return start_from_settings(crawler.settings, crawler)
diff --git a/scrapy/logformatter.py b/scrapy/logformatter.py
index d03d2d07f..0f9e6f1cb 100644
--- a/scrapy/logformatter.py
+++ b/scrapy/logformatter.py
@@ -1,46 +1,145 @@
 import os
+import logging
 
 from twisted.python.failure import Failure
 
-from scrapy import log
+from scrapy.utils.request import referer_str
+
+SCRAPEDMSG = "Scraped from %(src)s" + os.linesep + "%(item)s"
+DROPPEDMSG = "Dropped: %(exception)s" + os.linesep + "%(item)s"
+CRAWLEDMSG = "Crawled (%(status)s) %(request)s%(request_flags)s (referer: %(referer)s)%(response_flags)s"
+ITEMERRORMSG = "Error processing %(item)s"
+SPIDERERRORMSG = "Spider error processing %(request)s (referer: %(referer)s)"
+DOWNLOADERRORMSG_SHORT = "Error downloading %(request)s"
+DOWNLOADERRORMSG_LONG = "Error downloading %(request)s: %(errmsg)s"
 
 
-SCRAPEDFMT = u"Scraped from %(src)s" + os.linesep + "%(item)s"
-DROPPEDFMT = u"Dropped: %(exception)s" + os.linesep + "%(item)s"
-CRAWLEDFMT = u"Crawled (%(status)s) %(request)s (referer: %(referer)s)%(flags)s"
+class LogFormatter:
+    """Class for generating log messages for different actions.
 
-class LogFormatter(object):
-    """Class for generating log messages for different actions. All methods
-    must return a plain string which doesn't include the log level or the
-    timestamp
+    All methods must return a dictionary listing the parameters ``level``, ``msg``
+    and ``args`` which are going to be used for constructing the log message when
+    calling ``logging.log``.
+
+    Dictionary keys for the method outputs:
+
+    *   ``level`` is the log level for that action, you can use those from the
+        `python logging library <https://docs.python.org/3/library/logging.html>`_ :
+        ``logging.DEBUG``, ``logging.INFO``, ``logging.WARNING``, ``logging.ERROR``
+        and ``logging.CRITICAL``.
+    *   ``msg`` should be a string that can contain different formatting placeholders.
+        This string, formatted with the provided ``args``, is going to be the long message
+        for that action.
+    *   ``args`` should be a tuple or dict with the formatting placeholders for ``msg``.
+        The final log message is computed as ``msg % args``.
+
+    Users can define their own ``LogFormatter`` class if they want to customize how
+    each action is logged or if they want to omit it entirely. In order to omit
+    logging an action the method must return ``None``.
+
+    Here is an example on how to create a custom log formatter to lower the severity level of
+    the log message when an item is dropped from the pipeline::
+
+            class PoliteLogFormatter(logformatter.LogFormatter):
+                def dropped(self, item, exception, response, spider):
+                    return {
+                        'level': logging.INFO, # lowering the level from logging.WARNING
+                        'msg': "Dropped: %(exception)s" + os.linesep + "%(item)s",
+                        'args': {
+                            'exception': exception,
+                            'item': item,
+                        }
+                    }
     """
 
     def crawled(self, request, response, spider):
-        flags = ' %s' % str(response.flags) if response.flags else ''
+        """Logs a message when the crawler finds a webpage."""
+        request_flags = ' %s' % str(request.flags) if request.flags else ''
+        response_flags = ' %s' % str(response.flags) if response.flags else ''
         return {
-            'level': log.DEBUG,
-            'format': CRAWLEDFMT,
-            'status': response.status,
-            'request': request,
-            'referer': request.headers.get('Referer'),
-            'flags': flags,
+            'level': logging.DEBUG,
+            'msg': CRAWLEDMSG,
+            'args': {
+                'status': response.status,
+                'request': request,
+                'request_flags': request_flags,
+                'referer': referer_str(request),
+                'response_flags': response_flags,
+                # backward compatibility with Scrapy logformatter below 1.4 version
+                'flags': response_flags
+            }
         }
 
     def scraped(self, item, response, spider):
-        src = response.getErrorMessage() if isinstance(response, Failure) else response
+        """Logs a message when an item is scraped by a spider."""
+        if isinstance(response, Failure):
+            src = response.getErrorMessage()
+        else:
+            src = response
         return {
-            'level': log.DEBUG,
-            'format': SCRAPEDFMT,
-            'src': src,
-            'item': item,
+            'level': logging.DEBUG,
+            'msg': SCRAPEDMSG,
+            'args': {
+                'src': src,
+                'item': item,
+            }
         }
 
     def dropped(self, item, exception, response, spider):
+        """Logs a message when an item is dropped while it is passing through the item pipeline."""
         return {
-            'level': log.WARNING,
-            'format': DROPPEDFMT,
-            'exception': exception,
-            'item': item,
+            'level': logging.WARNING,
+            'msg': DROPPEDMSG,
+            'args': {
+                'exception': exception,
+                'item': item,
+            }
+        }
+
+    def item_error(self, item, exception, response, spider):
+        """Logs a message when an item causes an error while it is passing
+        through the item pipeline.
+
+        .. versionadded:: 2.0
+        """
+        return {
+            'level': logging.ERROR,
+            'msg': ITEMERRORMSG,
+            'args': {
+                'item': item,
+            }
+        }
+
+    def spider_error(self, failure, request, response, spider):
+        """Logs an error message from a spider.
+
+        .. versionadded:: 2.0
+        """
+        return {
+            'level': logging.ERROR,
+            'msg': SPIDERERRORMSG,
+            'args': {
+                'request': request,
+                'referer': referer_str(request),
+            }
+        }
+
+    def download_error(self, failure, request, spider, errmsg=None):
+        """Logs a download error message from a spider (typically coming from
+        the engine).
+
+        .. versionadded:: 2.0
+        """
+        args = {'request': request}
+        if errmsg:
+            msg = DOWNLOADERRORMSG_LONG
+            args['errmsg'] = errmsg
+        else:
+            msg = DOWNLOADERRORMSG_SHORT
+        return {
+            'level': logging.ERROR,
+            'msg': msg,
+            'args': args,
         }
 
     @classmethod
diff --git a/scrapy/mail.py b/scrapy/mail.py
index e1d7c44f6..7d7a2c435 100644
--- a/scrapy/mail.py
+++ b/scrapy/mail.py
@@ -3,33 +3,39 @@ Mail sending helpers
 
 See documentation in docs/topics/email.rst
 """
-from six.moves import cStringIO as StringIO
-import six
-
+import logging
+from email import encoders as Encoders
+from email.mime.base import MIMEBase
+from email.mime.multipart import MIMEMultipart
+from email.mime.nonmultipart import MIMENonMultipart
+from email.mime.text import MIMEText
 from email.utils import COMMASPACE, formatdate
-from six.moves.email_mime_multipart import MIMEMultipart
-from six.moves.email_mime_text import MIMEText
-from six.moves.email_mime_base import MIMEBase
-if six.PY2:
-    from email.MIMENonMultipart import MIMENonMultipart
-    from email import Encoders
-else:
-    from email.mime.nonmultipart import MIMENonMultipart
-    from email import encoders as Encoders
+from io import BytesIO
 
-from twisted.internet import defer, reactor, ssl
-from twisted.mail.smtp import ESMTPSenderFactory
+from twisted.internet import defer, ssl
 
-from scrapy import log
+from scrapy.utils.misc import arg_to_iter
+from scrapy.utils.python import to_bytes
 
-class MailSender(object):
 
-    def __init__(self, smtphost='localhost', mailfrom='scrapy@localhost',
-            smtpuser=None, smtppass=None, smtpport=25, smtptls=False, smtpssl=False, debug=False):
+logger = logging.getLogger(__name__)
+
+
+def _to_bytes_or_none(text):
+    if text is None:
+        return None
+    return to_bytes(text)
+
+
+class MailSender:
+    def __init__(
+        self, smtphost='localhost', mailfrom='scrapy@localhost', smtpuser=None,
+        smtppass=None, smtpport=25, smtptls=False, smtpssl=False, debug=False
+    ):
         self.smtphost = smtphost
         self.smtpport = smtpport
-        self.smtpuser = smtpuser
-        self.smtppass = smtppass
+        self.smtpuser = _to_bytes_or_none(smtpuser)
+        self.smtppass = _to_bytes_or_none(smtppass)
         self.smtptls = smtptls
         self.smtpssl = smtpssl
         self.mailfrom = mailfrom
@@ -37,15 +43,26 @@ class MailSender(object):
 
     @classmethod
     def from_settings(cls, settings):
-        return cls(settings['MAIL_HOST'], settings['MAIL_FROM'], settings['MAIL_USER'],
-            settings['MAIL_PASS'], settings.getint('MAIL_PORT'),
-            settings.getbool('MAIL_TLS'), settings.getbool('MAIL_SSL'))
+        return cls(
+            smtphost=settings['MAIL_HOST'],
+            mailfrom=settings['MAIL_FROM'],
+            smtpuser=settings['MAIL_USER'],
+            smtppass=settings['MAIL_PASS'],
+            smtpport=settings.getint('MAIL_PORT'),
+            smtptls=settings.getbool('MAIL_TLS'),
+            smtpssl=settings.getbool('MAIL_SSL'),
+        )
 
-    def send(self, to, subject, body, cc=None, attachs=(), mimetype='text/plain', _callback=None):
+    def send(self, to, subject, body, cc=None, attachs=(), mimetype='text/plain', charset=None, _callback=None):
+        from twisted.internet import reactor
         if attachs:
             msg = MIMEMultipart()
         else:
             msg = MIMENonMultipart(*mimetype.split('/', 1))
+
+        to = list(arg_to_iter(to))
+        cc = list(arg_to_iter(cc))
+
         msg['From'] = self.mailfrom
         msg['To'] = COMMASPACE.join(to)
         msg['Date'] = formatdate(localtime=True)
@@ -55,14 +72,16 @@ class MailSender(object):
             rcpts.extend(cc)
             msg['Cc'] = COMMASPACE.join(cc)
 
+        if charset:
+            msg.set_charset(charset)
+
         if attachs:
-            msg.attach(MIMEText(body))
+            msg.attach(MIMEText(body, 'plain', charset or 'us-ascii'))
             for attach_name, mimetype, f in attachs:
                 part = MIMEBase(*mimetype.split('/'))
                 part.set_payload(f.read())
                 Encoders.encode_base64(part)
-                part.add_header('Content-Disposition', 'attachment; filename="%s"' \
-                    % attach_name)
+                part.add_header('Content-Disposition', 'attachment', filename=attach_name)
                 msg.attach(part)
         else:
             msg.set_payload(body)
@@ -71,36 +90,46 @@ class MailSender(object):
             _callback(to=to, subject=subject, body=body, cc=cc, attach=attachs, msg=msg)
 
         if self.debug:
-            log.msg(format='Debug mail sent OK: To=%(mailto)s Cc=%(mailcc)s Subject="%(mailsubject)s" Attachs=%(mailattachs)d',
-                    level=log.DEBUG, mailto=to, mailcc=cc, mailsubject=subject, mailattachs=len(attachs))
+            logger.debug('Debug mail sent OK: To=%(mailto)s Cc=%(mailcc)s '
+                         'Subject="%(mailsubject)s" Attachs=%(mailattachs)d',
+                         {'mailto': to, 'mailcc': cc, 'mailsubject': subject,
+                          'mailattachs': len(attachs)})
             return
 
-        dfd = self._sendmail(rcpts, msg.as_string())
-        dfd.addCallbacks(self._sent_ok, self._sent_failed,
+        dfd = self._sendmail(rcpts, msg.as_string().encode(charset or 'utf-8'))
+        dfd.addCallbacks(
+            callback=self._sent_ok,
+            errback=self._sent_failed,
             callbackArgs=[to, cc, subject, len(attachs)],
-            errbackArgs=[to, cc, subject, len(attachs)])
+            errbackArgs=[to, cc, subject, len(attachs)],
+        )
         reactor.addSystemEventTrigger('before', 'shutdown', lambda: dfd)
         return dfd
 
     def _sent_ok(self, result, to, cc, subject, nattachs):
-        log.msg(format='Mail sent OK: To=%(mailto)s Cc=%(mailcc)s '
-                       'Subject="%(mailsubject)s" Attachs=%(mailattachs)d',
-                mailto=to, mailcc=cc, mailsubject=subject, mailattachs=nattachs)
+        logger.info('Mail sent OK: To=%(mailto)s Cc=%(mailcc)s '
+                    'Subject="%(mailsubject)s" Attachs=%(mailattachs)d',
+                    {'mailto': to, 'mailcc': cc, 'mailsubject': subject,
+                     'mailattachs': nattachs})
 
     def _sent_failed(self, failure, to, cc, subject, nattachs):
         errstr = str(failure.value)
-        log.msg(format='Unable to send mail: To=%(mailto)s Cc=%(mailcc)s '
-                       'Subject="%(mailsubject)s" Attachs=%(mailattachs)d'
-                       '- %(mailerr)s',
-                level=log.ERROR, mailto=to, mailcc=cc, mailsubject=subject,
-                mailattachs=nattachs, mailerr=errstr)
+        logger.error('Unable to send mail: To=%(mailto)s Cc=%(mailcc)s '
+                     'Subject="%(mailsubject)s" Attachs=%(mailattachs)d'
+                     '- %(mailerr)s',
+                     {'mailto': to, 'mailcc': cc, 'mailsubject': subject,
+                      'mailattachs': nattachs, 'mailerr': errstr})
 
     def _sendmail(self, to_addrs, msg):
-        msg = StringIO(msg)
+        # Import twisted.mail here because it is not available in python3
+        from twisted.internet import reactor
+        from twisted.mail.smtp import ESMTPSenderFactory
+        msg = BytesIO(msg)
         d = defer.Deferred()
-        factory = ESMTPSenderFactory(self.smtpuser, self.smtppass, self.mailfrom, \
-            to_addrs, msg, d, heloFallback=True, requireAuthentication=False, \
-            requireTransportSecurity=self.smtptls)
+        factory = ESMTPSenderFactory(
+            self.smtpuser, self.smtppass, self.mailfrom, to_addrs, msg, d,
+            heloFallback=True, requireAuthentication=False, requireTransportSecurity=self.smtptls,
+        )
         factory.noisy = False
 
         if self.smtpssl:
diff --git a/scrapy/middleware.py b/scrapy/middleware.py
index b1494b137..5040378ea 100644
--- a/scrapy/middleware.py
+++ b/scrapy/middleware.py
@@ -1,18 +1,22 @@
-from collections import defaultdict
+from collections import defaultdict, deque
+import logging
+import pprint
 
-from scrapy import log
 from scrapy.exceptions import NotConfigured
-from scrapy.utils.misc import load_object
+from scrapy.utils.misc import create_instance, load_object
 from scrapy.utils.defer import process_parallel, process_chain, process_chain_both
 
-class MiddlewareManager(object):
+logger = logging.getLogger(__name__)
+
+
+class MiddlewareManager:
     """Base class for implementing middleware managers"""
 
     component_name = 'foo middleware'
 
     def __init__(self, *middlewares):
         self.middlewares = middlewares
-        self.methods = defaultdict(list)
+        self.methods = defaultdict(deque)
         for mw in middlewares:
             self._add_middleware(mw)
 
@@ -24,25 +28,24 @@ class MiddlewareManager(object):
     def from_settings(cls, settings, crawler=None):
         mwlist = cls._get_mwlist_from_settings(settings)
         middlewares = []
+        enabled = []
         for clspath in mwlist:
             try:
                 mwcls = load_object(clspath)
-                if crawler and hasattr(mwcls, 'from_crawler'):
-                    mw = mwcls.from_crawler(crawler)
-                elif hasattr(mwcls, 'from_settings'):
-                    mw = mwcls.from_settings(settings)
-                else:
-                    mw = mwcls()
+                mw = create_instance(mwcls, settings, crawler)
                 middlewares.append(mw)
+                enabled.append(clspath)
             except NotConfigured as e:
                 if e.args:
                     clsname = clspath.split('.')[-1]
-                    log.msg(format="Disabled %(clsname)s: %(eargs)s",
-                            level=log.WARNING, clsname=clsname, eargs=e.args[0])
+                    logger.warning("Disabled %(clsname)s: %(eargs)s",
+                                   {'clsname': clsname, 'eargs': e.args[0]},
+                                   extra={'crawler': crawler})
 
-        enabled = [x.__class__.__name__ for x in middlewares]
-        log.msg(format="Enabled %(componentname)ss: %(enabledlist)s", level=log.INFO,
-                componentname=cls.component_name, enabledlist=', '.join(enabled))
+        logger.info("Enabled %(componentname)ss:\n%(enabledlist)s",
+                    {'componentname': cls.component_name,
+                     'enabledlist': pprint.pformat(enabled)},
+                    extra={'crawler': crawler})
         return cls(*middlewares)
 
     @classmethod
@@ -53,7 +56,7 @@ class MiddlewareManager(object):
         if hasattr(mw, 'open_spider'):
             self.methods['open_spider'].append(mw.open_spider)
         if hasattr(mw, 'close_spider'):
-            self.methods['close_spider'].insert(0, mw.close_spider)
+            self.methods['close_spider'].appendleft(mw.close_spider)
 
     def _process_parallel(self, methodname, obj, *args):
         return process_parallel(self.methods[methodname], obj, *args)
@@ -62,8 +65,8 @@ class MiddlewareManager(object):
         return process_chain(self.methods[methodname], obj, *args)
 
     def _process_chain_both(self, cb_methodname, eb_methodname, obj, *args):
-        return process_chain_both(self.methods[cb_methodname], \
-            self.methods[eb_methodname], obj, *args)
+        return process_chain_both(self.methods[cb_methodname],
+                                  self.methods[eb_methodname], obj, *args)
 
     def open_spider(self, spider):
         return self._process_parallel('open_spider', spider)
diff --git a/scrapy/pipelines/__init__.py b/scrapy/pipelines/__init__.py
new file mode 100644
index 000000000..b5725a8ee
--- /dev/null
+++ b/scrapy/pipelines/__init__.py
@@ -0,0 +1,26 @@
+"""
+Item pipeline
+
+See documentation in docs/item-pipeline.rst
+"""
+
+from scrapy.middleware import MiddlewareManager
+from scrapy.utils.conf import build_component_list
+from scrapy.utils.defer import deferred_f_from_coro_f
+
+
+class ItemPipelineManager(MiddlewareManager):
+
+    component_name = 'item pipeline'
+
+    @classmethod
+    def _get_mwlist_from_settings(cls, settings):
+        return build_component_list(settings.getwithbase('ITEM_PIPELINES'))
+
+    def _add_middleware(self, pipe):
+        super(ItemPipelineManager, self)._add_middleware(pipe)
+        if hasattr(pipe, 'process_item'):
+            self.methods['process_item'].append(deferred_f_from_coro_f(pipe.process_item))
+
+    def process_item(self, item, spider):
+        return self._process_chain('process_item', item, spider)
diff --git a/scrapy/pipelines/files.py b/scrapy/pipelines/files.py
new file mode 100644
index 000000000..5a2184681
--- /dev/null
+++ b/scrapy/pipelines/files.py
@@ -0,0 +1,548 @@
+"""
+Files Pipeline
+
+See documentation in topics/media-pipeline.rst
+"""
+import functools
+import hashlib
+import logging
+import mimetypes
+import os
+import time
+from collections import defaultdict
+from contextlib import suppress
+from email.utils import mktime_tz, parsedate_tz
+from ftplib import FTP
+from io import BytesIO
+from urllib.parse import urlparse
+
+from itemadapter import ItemAdapter
+from twisted.internet import defer, threads
+
+from scrapy.exceptions import IgnoreRequest, NotConfigured
+from scrapy.http import Request
+from scrapy.pipelines.media import MediaPipeline
+from scrapy.settings import Settings
+from scrapy.utils.boto import is_botocore
+from scrapy.utils.datatypes import CaselessDict
+from scrapy.utils.ftp import ftp_store_file
+from scrapy.utils.log import failure_to_exc_info
+from scrapy.utils.misc import md5sum
+from scrapy.utils.python import to_bytes
+from scrapy.utils.request import referer_str
+
+
+logger = logging.getLogger(__name__)
+
+
+class FileException(Exception):
+    """General media error exception"""
+
+
+class FSFilesStore:
+    def __init__(self, basedir):
+        if '://' in basedir:
+            basedir = basedir.split('://', 1)[1]
+        self.basedir = basedir
+        self._mkdir(self.basedir)
+        self.created_directories = defaultdict(set)
+
+    def persist_file(self, path, buf, info, meta=None, headers=None):
+        absolute_path = self._get_filesystem_path(path)
+        self._mkdir(os.path.dirname(absolute_path), info)
+        with open(absolute_path, 'wb') as f:
+            f.write(buf.getvalue())
+
+    def stat_file(self, path, info):
+        absolute_path = self._get_filesystem_path(path)
+        try:
+            last_modified = os.path.getmtime(absolute_path)
+        except os.error:
+            return {}
+
+        with open(absolute_path, 'rb') as f:
+            checksum = md5sum(f)
+
+        return {'last_modified': last_modified, 'checksum': checksum}
+
+    def _get_filesystem_path(self, path):
+        path_comps = path.split('/')
+        return os.path.join(self.basedir, *path_comps)
+
+    def _mkdir(self, dirname, domain=None):
+        seen = self.created_directories[domain] if domain else set()
+        if dirname not in seen:
+            if not os.path.exists(dirname):
+                os.makedirs(dirname)
+            seen.add(dirname)
+
+
+class S3FilesStore:
+    AWS_ACCESS_KEY_ID = None
+    AWS_SECRET_ACCESS_KEY = None
+    AWS_ENDPOINT_URL = None
+    AWS_REGION_NAME = None
+    AWS_USE_SSL = None
+    AWS_VERIFY = None
+
+    POLICY = 'private'  # Overriden from settings.FILES_STORE_S3_ACL in FilesPipeline.from_settings
+    HEADERS = {
+        'Cache-Control': 'max-age=172800',
+    }
+
+    def __init__(self, uri):
+        self.is_botocore = is_botocore()
+        if self.is_botocore:
+            import botocore.session
+            session = botocore.session.get_session()
+            self.s3_client = session.create_client(
+                's3',
+                aws_access_key_id=self.AWS_ACCESS_KEY_ID,
+                aws_secret_access_key=self.AWS_SECRET_ACCESS_KEY,
+                endpoint_url=self.AWS_ENDPOINT_URL,
+                region_name=self.AWS_REGION_NAME,
+                use_ssl=self.AWS_USE_SSL,
+                verify=self.AWS_VERIFY
+            )
+        else:
+            from boto.s3.connection import S3Connection
+            self.S3Connection = S3Connection
+        if not uri.startswith("s3://"):
+            raise ValueError("Incorrect URI scheme in %s, expected 's3'" % uri)
+        self.bucket, self.prefix = uri[5:].split('/', 1)
+
+    def stat_file(self, path, info):
+        def _onsuccess(boto_key):
+            if self.is_botocore:
+                checksum = boto_key['ETag'].strip('"')
+                last_modified = boto_key['LastModified']
+                modified_stamp = time.mktime(last_modified.timetuple())
+            else:
+                checksum = boto_key.etag.strip('"')
+                last_modified = boto_key.last_modified
+                modified_tuple = parsedate_tz(last_modified)
+                modified_stamp = int(mktime_tz(modified_tuple))
+            return {'checksum': checksum, 'last_modified': modified_stamp}
+
+        return self._get_boto_key(path).addCallback(_onsuccess)
+
+    def _get_boto_bucket(self):
+        # disable ssl (is_secure=False) because of this python bug:
+        # https://bugs.python.org/issue5103
+        c = self.S3Connection(self.AWS_ACCESS_KEY_ID, self.AWS_SECRET_ACCESS_KEY, is_secure=False)
+        return c.get_bucket(self.bucket, validate=False)
+
+    def _get_boto_key(self, path):
+        key_name = '%s%s' % (self.prefix, path)
+        if self.is_botocore:
+            return threads.deferToThread(
+                self.s3_client.head_object,
+                Bucket=self.bucket,
+                Key=key_name)
+        else:
+            b = self._get_boto_bucket()
+            return threads.deferToThread(b.get_key, key_name)
+
+    def persist_file(self, path, buf, info, meta=None, headers=None):
+        """Upload file to S3 storage"""
+        key_name = '%s%s' % (self.prefix, path)
+        buf.seek(0)
+        if self.is_botocore:
+            extra = self._headers_to_botocore_kwargs(self.HEADERS)
+            if headers:
+                extra.update(self._headers_to_botocore_kwargs(headers))
+            return threads.deferToThread(
+                self.s3_client.put_object,
+                Bucket=self.bucket,
+                Key=key_name,
+                Body=buf,
+                Metadata={k: str(v) for k, v in (meta or {}).items()},
+                ACL=self.POLICY,
+                **extra)
+        else:
+            b = self._get_boto_bucket()
+            k = b.new_key(key_name)
+            if meta:
+                for metakey, metavalue in meta.items():
+                    k.set_metadata(metakey, str(metavalue))
+            h = self.HEADERS.copy()
+            if headers:
+                h.update(headers)
+            return threads.deferToThread(
+                k.set_contents_from_string, buf.getvalue(),
+                headers=h, policy=self.POLICY)
+
+    def _headers_to_botocore_kwargs(self, headers):
+        """ Convert headers to botocore keyword agruments.
+        """
+        # This is required while we need to support both boto and botocore.
+        mapping = CaselessDict({
+            'Content-Type': 'ContentType',
+            'Cache-Control': 'CacheControl',
+            'Content-Disposition': 'ContentDisposition',
+            'Content-Encoding': 'ContentEncoding',
+            'Content-Language': 'ContentLanguage',
+            'Content-Length': 'ContentLength',
+            'Content-MD5': 'ContentMD5',
+            'Expires': 'Expires',
+            'X-Amz-Grant-Full-Control': 'GrantFullControl',
+            'X-Amz-Grant-Read': 'GrantRead',
+            'X-Amz-Grant-Read-ACP': 'GrantReadACP',
+            'X-Amz-Grant-Write-ACP': 'GrantWriteACP',
+            'X-Amz-Object-Lock-Legal-Hold': 'ObjectLockLegalHoldStatus',
+            'X-Amz-Object-Lock-Mode': 'ObjectLockMode',
+            'X-Amz-Object-Lock-Retain-Until-Date': 'ObjectLockRetainUntilDate',
+            'X-Amz-Request-Payer': 'RequestPayer',
+            'X-Amz-Server-Side-Encryption': 'ServerSideEncryption',
+            'X-Amz-Server-Side-Encryption-Aws-Kms-Key-Id': 'SSEKMSKeyId',
+            'X-Amz-Server-Side-Encryption-Context': 'SSEKMSEncryptionContext',
+            'X-Amz-Server-Side-Encryption-Customer-Algorithm': 'SSECustomerAlgorithm',
+            'X-Amz-Server-Side-Encryption-Customer-Key': 'SSECustomerKey',
+            'X-Amz-Server-Side-Encryption-Customer-Key-Md5': 'SSECustomerKeyMD5',
+            'X-Amz-Storage-Class': 'StorageClass',
+            'X-Amz-Tagging': 'Tagging',
+            'X-Amz-Website-Redirect-Location': 'WebsiteRedirectLocation',
+        })
+        extra = {}
+        for key, value in headers.items():
+            try:
+                kwarg = mapping[key]
+            except KeyError:
+                raise TypeError(
+                    'Header "%s" is not supported by botocore' % key)
+            else:
+                extra[kwarg] = value
+        return extra
+
+
+class GCSFilesStore:
+
+    GCS_PROJECT_ID = None
+
+    CACHE_CONTROL = 'max-age=172800'
+
+    # The bucket's default object ACL will be applied to the object.
+    # Overriden from settings.FILES_STORE_GCS_ACL in FilesPipeline.from_settings.
+    POLICY = None
+
+    def __init__(self, uri):
+        from google.cloud import storage
+        client = storage.Client(project=self.GCS_PROJECT_ID)
+        bucket, prefix = uri[5:].split('/', 1)
+        self.bucket = client.bucket(bucket)
+        self.prefix = prefix
+        permissions = self.bucket.test_iam_permissions(
+            ['storage.objects.get', 'storage.objects.create']
+        )
+        if 'storage.objects.get' not in permissions:
+            logger.warning(
+                "No 'storage.objects.get' permission for GSC bucket %(bucket)s. "
+                "Checking if files are up to date will be impossible. Files will be downloaded every time.",
+                {'bucket': bucket}
+            )
+        if 'storage.objects.create' not in permissions:
+            logger.error(
+                "No 'storage.objects.create' permission for GSC bucket %(bucket)s. Saving files will be impossible!",
+                {'bucket': bucket}
+            )
+
+    def stat_file(self, path, info):
+        def _onsuccess(blob):
+            if blob:
+                checksum = blob.md5_hash
+                last_modified = time.mktime(blob.updated.timetuple())
+                return {'checksum': checksum, 'last_modified': last_modified}
+            else:
+                return {}
+
+        return threads.deferToThread(self.bucket.get_blob, path).addCallback(_onsuccess)
+
+    def _get_content_type(self, headers):
+        if headers and 'Content-Type' in headers:
+            return headers['Content-Type']
+        else:
+            return 'application/octet-stream'
+
+    def persist_file(self, path, buf, info, meta=None, headers=None):
+        blob = self.bucket.blob(self.prefix + path)
+        blob.cache_control = self.CACHE_CONTROL
+        blob.metadata = {k: str(v) for k, v in (meta or {}).items()}
+        return threads.deferToThread(
+            blob.upload_from_string,
+            data=buf.getvalue(),
+            content_type=self._get_content_type(headers),
+            predefined_acl=self.POLICY
+        )
+
+
+class FTPFilesStore:
+
+    FTP_USERNAME = None
+    FTP_PASSWORD = None
+    USE_ACTIVE_MODE = None
+
+    def __init__(self, uri):
+        if not uri.startswith("ftp://"):
+            raise ValueError("Incorrect URI scheme in %s, expected 'ftp'" % uri)
+        u = urlparse(uri)
+        self.port = u.port
+        self.host = u.hostname
+        self.port = int(u.port or 21)
+        self.username = u.username or self.FTP_USERNAME
+        self.password = u.password or self.FTP_PASSWORD
+        self.basedir = u.path.rstrip('/')
+
+    def persist_file(self, path, buf, info, meta=None, headers=None):
+        path = '%s/%s' % (self.basedir, path)
+        return threads.deferToThread(
+            ftp_store_file, path=path, file=buf,
+            host=self.host, port=self.port, username=self.username,
+            password=self.password, use_active_mode=self.USE_ACTIVE_MODE
+        )
+
+    def stat_file(self, path, info):
+        def _stat_file(path):
+            try:
+                ftp = FTP()
+                ftp.connect(self.host, self.port)
+                ftp.login(self.username, self.password)
+                if self.USE_ACTIVE_MODE:
+                    ftp.set_pasv(False)
+                file_path = "%s/%s" % (self.basedir, path)
+                last_modified = float(ftp.voidcmd("MDTM %s" % file_path)[4:].strip())
+                m = hashlib.md5()
+                ftp.retrbinary('RETR %s' % file_path, m.update)
+                return {'last_modified': last_modified, 'checksum': m.hexdigest()}
+            # The file doesn't exist
+            except Exception:
+                return {}
+        return threads.deferToThread(_stat_file, path)
+
+
+class FilesPipeline(MediaPipeline):
+    """Abstract pipeline that implement the file downloading
+
+    This pipeline tries to minimize network transfers and file processing,
+    doing stat of the files and determining if file is new, uptodate or
+    expired.
+
+    ``new`` files are those that pipeline never processed and needs to be
+        downloaded from supplier site the first time.
+
+    ``uptodate`` files are the ones that the pipeline processed and are still
+        valid files.
+
+    ``expired`` files are those that pipeline already processed but the last
+        modification was made long time ago, so a reprocessing is recommended to
+        refresh it in case of change.
+
+    """
+
+    MEDIA_NAME = "file"
+    EXPIRES = 90
+    STORE_SCHEMES = {
+        '': FSFilesStore,
+        'file': FSFilesStore,
+        's3': S3FilesStore,
+        'gs': GCSFilesStore,
+        'ftp': FTPFilesStore
+    }
+    DEFAULT_FILES_URLS_FIELD = 'file_urls'
+    DEFAULT_FILES_RESULT_FIELD = 'files'
+
+    def __init__(self, store_uri, download_func=None, settings=None):
+        if not store_uri:
+            raise NotConfigured
+
+        if isinstance(settings, dict) or settings is None:
+            settings = Settings(settings)
+
+        cls_name = "FilesPipeline"
+        self.store = self._get_store(store_uri)
+        resolve = functools.partial(self._key_for_pipe,
+                                    base_class_name=cls_name,
+                                    settings=settings)
+        self.expires = settings.getint(
+            resolve('FILES_EXPIRES'), self.EXPIRES
+        )
+        if not hasattr(self, "FILES_URLS_FIELD"):
+            self.FILES_URLS_FIELD = self.DEFAULT_FILES_URLS_FIELD
+        if not hasattr(self, "FILES_RESULT_FIELD"):
+            self.FILES_RESULT_FIELD = self.DEFAULT_FILES_RESULT_FIELD
+        self.files_urls_field = settings.get(
+            resolve('FILES_URLS_FIELD'), self.FILES_URLS_FIELD
+        )
+        self.files_result_field = settings.get(
+            resolve('FILES_RESULT_FIELD'), self.FILES_RESULT_FIELD
+        )
+
+        super().__init__(download_func=download_func, settings=settings)
+
+    @classmethod
+    def from_settings(cls, settings):
+        s3store = cls.STORE_SCHEMES['s3']
+        s3store.AWS_ACCESS_KEY_ID = settings['AWS_ACCESS_KEY_ID']
+        s3store.AWS_SECRET_ACCESS_KEY = settings['AWS_SECRET_ACCESS_KEY']
+        s3store.AWS_ENDPOINT_URL = settings['AWS_ENDPOINT_URL']
+        s3store.AWS_REGION_NAME = settings['AWS_REGION_NAME']
+        s3store.AWS_USE_SSL = settings['AWS_USE_SSL']
+        s3store.AWS_VERIFY = settings['AWS_VERIFY']
+        s3store.POLICY = settings['FILES_STORE_S3_ACL']
+
+        gcs_store = cls.STORE_SCHEMES['gs']
+        gcs_store.GCS_PROJECT_ID = settings['GCS_PROJECT_ID']
+        gcs_store.POLICY = settings['FILES_STORE_GCS_ACL'] or None
+
+        ftp_store = cls.STORE_SCHEMES['ftp']
+        ftp_store.FTP_USERNAME = settings['FTP_USER']
+        ftp_store.FTP_PASSWORD = settings['FTP_PASSWORD']
+        ftp_store.USE_ACTIVE_MODE = settings.getbool('FEED_STORAGE_FTP_ACTIVE')
+
+        store_uri = settings['FILES_STORE']
+        return cls(store_uri, settings=settings)
+
+    def _get_store(self, uri):
+        if os.path.isabs(uri):  # to support win32 paths like: C:\\some\dir
+            scheme = 'file'
+        else:
+            scheme = urlparse(uri).scheme
+        store_cls = self.STORE_SCHEMES[scheme]
+        return store_cls(uri)
+
+    def media_to_download(self, request, info, *, item=None):
+        def _onsuccess(result):
+            if not result:
+                return  # returning None force download
+
+            last_modified = result.get('last_modified', None)
+            if not last_modified:
+                return  # returning None force download
+
+            age_seconds = time.time() - last_modified
+            age_days = age_seconds / 60 / 60 / 24
+            if age_days > self.expires:
+                return  # returning None force download
+
+            referer = referer_str(request)
+            logger.debug(
+                'File (uptodate): Downloaded %(medianame)s from %(request)s '
+                'referred in <%(referer)s>',
+                {'medianame': self.MEDIA_NAME, 'request': request,
+                 'referer': referer},
+                extra={'spider': info.spider}
+            )
+            self.inc_stats(info.spider, 'uptodate')
+
+            checksum = result.get('checksum', None)
+            return {'url': request.url, 'path': path, 'checksum': checksum, 'status': 'uptodate'}
+
+        path = self.file_path(request, info=info, item=item)
+        dfd = defer.maybeDeferred(self.store.stat_file, path, info)
+        dfd.addCallbacks(_onsuccess, lambda _: None)
+        dfd.addErrback(
+            lambda f:
+            logger.error(self.__class__.__name__ + '.store.stat_file',
+                         exc_info=failure_to_exc_info(f),
+                         extra={'spider': info.spider})
+        )
+        return dfd
+
+    def media_failed(self, failure, request, info):
+        if not isinstance(failure.value, IgnoreRequest):
+            referer = referer_str(request)
+            logger.warning(
+                'File (unknown-error): Error downloading %(medianame)s from '
+                '%(request)s referred in <%(referer)s>: %(exception)s',
+                {'medianame': self.MEDIA_NAME, 'request': request,
+                 'referer': referer, 'exception': failure.value},
+                extra={'spider': info.spider}
+            )
+
+        raise FileException
+
+    def media_downloaded(self, response, request, info, *, item=None):
+        referer = referer_str(request)
+
+        if response.status != 200:
+            logger.warning(
+                'File (code: %(status)s): Error downloading file from '
+                '%(request)s referred in <%(referer)s>',
+                {'status': response.status,
+                 'request': request, 'referer': referer},
+                extra={'spider': info.spider}
+            )
+            raise FileException('download-error')
+
+        if not response.body:
+            logger.warning(
+                'File (empty-content): Empty file from %(request)s referred '
+                'in <%(referer)s>: no-content',
+                {'request': request, 'referer': referer},
+                extra={'spider': info.spider}
+            )
+            raise FileException('empty-content')
+
+        status = 'cached' if 'cached' in response.flags else 'downloaded'
+        logger.debug(
+            'File (%(status)s): Downloaded file from %(request)s referred in '
+            '<%(referer)s>',
+            {'status': status, 'request': request, 'referer': referer},
+            extra={'spider': info.spider}
+        )
+        self.inc_stats(info.spider, status)
+
+        try:
+            path = self.file_path(request, response=response, info=info, item=item)
+            checksum = self.file_downloaded(response, request, info, item=item)
+        except FileException as exc:
+            logger.warning(
+                'File (error): Error processing file from %(request)s '
+                'referred in <%(referer)s>: %(errormsg)s',
+                {'request': request, 'referer': referer, 'errormsg': str(exc)},
+                extra={'spider': info.spider}, exc_info=True
+            )
+            raise
+        except Exception as exc:
+            logger.error(
+                'File (unknown-error): Error processing file from %(request)s '
+                'referred in <%(referer)s>',
+                {'request': request, 'referer': referer},
+                exc_info=True, extra={'spider': info.spider}
+            )
+            raise FileException(str(exc))
+
+        return {'url': request.url, 'path': path, 'checksum': checksum, 'status': status}
+
+    def inc_stats(self, spider, status):
+        spider.crawler.stats.inc_value('file_count', spider=spider)
+        spider.crawler.stats.inc_value('file_status_count/%s' % status, spider=spider)
+
+    # Overridable Interface
+    def get_media_requests(self, item, info):
+        urls = ItemAdapter(item).get(self.files_urls_field, [])
+        return [Request(u) for u in urls]
+
+    def file_downloaded(self, response, request, info, *, item=None):
+        path = self.file_path(request, response=response, info=info, item=item)
+        buf = BytesIO(response.body)
+        checksum = md5sum(buf)
+        buf.seek(0)
+        self.store.persist_file(path, buf, info)
+        return checksum
+
+    def item_completed(self, results, item, info):
+        with suppress(KeyError):
+            ItemAdapter(item)[self.files_result_field] = [x for ok, x in results if ok]
+        return item
+
+    def file_path(self, request, response=None, info=None, *, item=None):
+        media_guid = hashlib.sha1(to_bytes(request.url)).hexdigest()
+        media_ext = os.path.splitext(request.url)[1]
+        # Handles empty and wild extensions by trying to guess the
+        # mime type then extension or default to empty string otherwise
+        if media_ext not in mimetypes.types_map:
+            media_ext = ''
+            media_type = mimetypes.guess_type(request.url)[0]
+            if media_type:
+                media_ext = mimetypes.guess_extension(media_type)
+        return 'full/%s%s' % (media_guid, media_ext)
diff --git a/scrapy/pipelines/images.py b/scrapy/pipelines/images.py
new file mode 100644
index 000000000..0a67a0b1d
--- /dev/null
+++ b/scrapy/pipelines/images.py
@@ -0,0 +1,175 @@
+"""
+Images Pipeline
+
+See documentation in topics/media-pipeline.rst
+"""
+import functools
+import hashlib
+from contextlib import suppress
+from io import BytesIO
+
+from itemadapter import ItemAdapter
+from PIL import Image
+
+from scrapy.exceptions import DropItem
+from scrapy.http import Request
+from scrapy.pipelines.files import FileException, FilesPipeline
+# TODO: from scrapy.pipelines.media import MediaPipeline
+from scrapy.settings import Settings
+from scrapy.utils.misc import md5sum
+from scrapy.utils.python import to_bytes
+
+
+class NoimagesDrop(DropItem):
+    """Product with no images exception"""
+
+
+class ImageException(FileException):
+    """General image error exception"""
+
+
+class ImagesPipeline(FilesPipeline):
+    """Abstract pipeline that implement the image thumbnail generation logic
+
+    """
+
+    MEDIA_NAME = 'image'
+
+    # Uppercase attributes kept for backward compatibility with code that subclasses
+    # ImagesPipeline. They may be overridden by settings.
+    MIN_WIDTH = 0
+    MIN_HEIGHT = 0
+    EXPIRES = 90
+    THUMBS = {}
+    DEFAULT_IMAGES_URLS_FIELD = 'image_urls'
+    DEFAULT_IMAGES_RESULT_FIELD = 'images'
+
+    def __init__(self, store_uri, download_func=None, settings=None):
+        super().__init__(store_uri, settings=settings, download_func=download_func)
+
+        if isinstance(settings, dict) or settings is None:
+            settings = Settings(settings)
+
+        resolve = functools.partial(self._key_for_pipe,
+                                    base_class_name="ImagesPipeline",
+                                    settings=settings)
+        self.expires = settings.getint(
+            resolve("IMAGES_EXPIRES"), self.EXPIRES
+        )
+
+        if not hasattr(self, "IMAGES_RESULT_FIELD"):
+            self.IMAGES_RESULT_FIELD = self.DEFAULT_IMAGES_RESULT_FIELD
+        if not hasattr(self, "IMAGES_URLS_FIELD"):
+            self.IMAGES_URLS_FIELD = self.DEFAULT_IMAGES_URLS_FIELD
+
+        self.images_urls_field = settings.get(
+            resolve('IMAGES_URLS_FIELD'),
+            self.IMAGES_URLS_FIELD
+        )
+        self.images_result_field = settings.get(
+            resolve('IMAGES_RESULT_FIELD'),
+            self.IMAGES_RESULT_FIELD
+        )
+        self.min_width = settings.getint(
+            resolve('IMAGES_MIN_WIDTH'), self.MIN_WIDTH
+        )
+        self.min_height = settings.getint(
+            resolve('IMAGES_MIN_HEIGHT'), self.MIN_HEIGHT
+        )
+        self.thumbs = settings.get(
+            resolve('IMAGES_THUMBS'), self.THUMBS
+        )
+
+    @classmethod
+    def from_settings(cls, settings):
+        s3store = cls.STORE_SCHEMES['s3']
+        s3store.AWS_ACCESS_KEY_ID = settings['AWS_ACCESS_KEY_ID']
+        s3store.AWS_SECRET_ACCESS_KEY = settings['AWS_SECRET_ACCESS_KEY']
+        s3store.AWS_ENDPOINT_URL = settings['AWS_ENDPOINT_URL']
+        s3store.AWS_REGION_NAME = settings['AWS_REGION_NAME']
+        s3store.AWS_USE_SSL = settings['AWS_USE_SSL']
+        s3store.AWS_VERIFY = settings['AWS_VERIFY']
+        s3store.POLICY = settings['IMAGES_STORE_S3_ACL']
+
+        gcs_store = cls.STORE_SCHEMES['gs']
+        gcs_store.GCS_PROJECT_ID = settings['GCS_PROJECT_ID']
+        gcs_store.POLICY = settings['IMAGES_STORE_GCS_ACL'] or None
+
+        ftp_store = cls.STORE_SCHEMES['ftp']
+        ftp_store.FTP_USERNAME = settings['FTP_USER']
+        ftp_store.FTP_PASSWORD = settings['FTP_PASSWORD']
+        ftp_store.USE_ACTIVE_MODE = settings.getbool('FEED_STORAGE_FTP_ACTIVE')
+
+        store_uri = settings['IMAGES_STORE']
+        return cls(store_uri, settings=settings)
+
+    def file_downloaded(self, response, request, info, *, item=None):
+        return self.image_downloaded(response, request, info, item=item)
+
+    def image_downloaded(self, response, request, info, *, item=None):
+        checksum = None
+        for path, image, buf in self.get_images(response, request, info, item=item):
+            if checksum is None:
+                buf.seek(0)
+                checksum = md5sum(buf)
+            width, height = image.size
+            self.store.persist_file(
+                path, buf, info,
+                meta={'width': width, 'height': height},
+                headers={'Content-Type': 'image/jpeg'})
+        return checksum
+
+    def get_images(self, response, request, info, *, item=None):
+        path = self.file_path(request, response=response, info=info, item=item)
+        orig_image = Image.open(BytesIO(response.body))
+
+        width, height = orig_image.size
+        if width < self.min_width or height < self.min_height:
+            raise ImageException("Image too small (%dx%d < %dx%d)" %
+                                 (width, height, self.min_width, self.min_height))
+
+        image, buf = self.convert_image(orig_image)
+        yield path, image, buf
+
+        for thumb_id, size in self.thumbs.items():
+            thumb_path = self.thumb_path(request, thumb_id, response=response, info=info)
+            thumb_image, thumb_buf = self.convert_image(image, size)
+            yield thumb_path, thumb_image, thumb_buf
+
+    def convert_image(self, image, size=None):
+        if image.format == 'PNG' and image.mode == 'RGBA':
+            background = Image.new('RGBA', image.size, (255, 255, 255))
+            background.paste(image, image)
+            image = background.convert('RGB')
+        elif image.mode == 'P':
+            image = image.convert("RGBA")
+            background = Image.new('RGBA', image.size, (255, 255, 255))
+            background.paste(image, image)
+            image = background.convert('RGB')
+        elif image.mode != 'RGB':
+            image = image.convert('RGB')
+
+        if size:
+            image = image.copy()
+            image.thumbnail(size, Image.ANTIALIAS)
+
+        buf = BytesIO()
+        image.save(buf, 'JPEG')
+        return image, buf
+
+    def get_media_requests(self, item, info):
+        urls = ItemAdapter(item).get(self.images_urls_field, [])
+        return [Request(u) for u in urls]
+
+    def item_completed(self, results, item, info):
+        with suppress(KeyError):
+            ItemAdapter(item)[self.images_result_field] = [x for ok, x in results if ok]
+        return item
+
+    def file_path(self, request, response=None, info=None, *, item=None):
+        image_guid = hashlib.sha1(to_bytes(request.url)).hexdigest()
+        return 'full/%s.jpg' % (image_guid)
+
+    def thumb_path(self, request, thumb_id, response=None, info=None):
+        thumb_guid = hashlib.sha1(to_bytes(request.url)).hexdigest()
+        return 'thumbs/%s/%s.jpg' % (thumb_id, thumb_guid)
diff --git a/scrapy/pipelines/media.py b/scrapy/pipelines/media.py
new file mode 100644
index 000000000..2439de9a5
--- /dev/null
+++ b/scrapy/pipelines/media.py
@@ -0,0 +1,252 @@
+import functools
+import logging
+from collections import defaultdict
+from inspect import signature
+from warnings import warn
+
+from twisted.internet.defer import Deferred, DeferredList
+from twisted.python.failure import Failure
+
+from scrapy.settings import Settings
+from scrapy.utils.datatypes import SequenceExclude
+from scrapy.utils.defer import mustbe_deferred, defer_result
+from scrapy.utils.deprecate import ScrapyDeprecationWarning
+from scrapy.utils.request import request_fingerprint
+from scrapy.utils.misc import arg_to_iter
+from scrapy.utils.log import failure_to_exc_info
+
+logger = logging.getLogger(__name__)
+
+
+class MediaPipeline:
+
+    LOG_FAILED_RESULTS = True
+
+    class SpiderInfo:
+        def __init__(self, spider):
+            self.spider = spider
+            self.downloading = set()
+            self.downloaded = {}
+            self.waiting = defaultdict(list)
+
+    def __init__(self, download_func=None, settings=None):
+        self.download_func = download_func
+        self._expects_item = {}
+
+        if isinstance(settings, dict) or settings is None:
+            settings = Settings(settings)
+        resolve = functools.partial(self._key_for_pipe,
+                                    base_class_name="MediaPipeline",
+                                    settings=settings)
+        self.allow_redirects = settings.getbool(
+            resolve('MEDIA_ALLOW_REDIRECTS'), False
+        )
+        self._handle_statuses(self.allow_redirects)
+
+        # Check if deprecated methods are being used and make them compatible
+        self._make_compatible()
+
+    def _handle_statuses(self, allow_redirects):
+        self.handle_httpstatus_list = None
+        if allow_redirects:
+            self.handle_httpstatus_list = SequenceExclude(range(300, 400))
+
+    def _key_for_pipe(self, key, base_class_name=None, settings=None):
+        """
+        >>> MediaPipeline()._key_for_pipe("IMAGES")
+        'IMAGES'
+        >>> class MyPipe(MediaPipeline):
+        ...     pass
+        >>> MyPipe()._key_for_pipe("IMAGES", base_class_name="MediaPipeline")
+        'MYPIPE_IMAGES'
+        """
+        class_name = self.__class__.__name__
+        formatted_key = "{}_{}".format(class_name.upper(), key)
+        if (
+            not base_class_name
+            or class_name == base_class_name
+            or settings and not settings.get(formatted_key)
+        ):
+            return key
+        return formatted_key
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        try:
+            pipe = cls.from_settings(crawler.settings)
+        except AttributeError:
+            pipe = cls()
+        pipe.crawler = crawler
+        return pipe
+
+    def open_spider(self, spider):
+        self.spiderinfo = self.SpiderInfo(spider)
+
+    def process_item(self, item, spider):
+        info = self.spiderinfo
+        requests = arg_to_iter(self.get_media_requests(item, info))
+        dlist = [self._process_request(r, info, item) for r in requests]
+        dfd = DeferredList(dlist, consumeErrors=1)
+        return dfd.addCallback(self.item_completed, item, info)
+
+    def _process_request(self, request, info, item):
+        fp = request_fingerprint(request)
+        cb = request.callback or (lambda _: _)
+        eb = request.errback
+        request.callback = None
+        request.errback = None
+
+        # Return cached result if request was already seen
+        if fp in info.downloaded:
+            return defer_result(info.downloaded[fp]).addCallbacks(cb, eb)
+
+        # Otherwise, wait for result
+        wad = Deferred().addCallbacks(cb, eb)
+        info.waiting[fp].append(wad)
+
+        # Check if request is downloading right now to avoid doing it twice
+        if fp in info.downloading:
+            return wad
+
+        # Download request checking media_to_download hook output first
+        info.downloading.add(fp)
+        dfd = mustbe_deferred(self.media_to_download, request, info, item=item)
+        dfd.addCallback(self._check_media_to_download, request, info, item=item)
+        dfd.addBoth(self._cache_result_and_execute_waiters, fp, info)
+        dfd.addErrback(lambda f: logger.error(
+            f.value, exc_info=failure_to_exc_info(f), extra={'spider': info.spider})
+        )
+        return dfd.addBoth(lambda _: wad)  # it must return wad at last
+
+    def _make_compatible(self):
+        """Make overridable methods of MediaPipeline and subclasses backwards compatible"""
+        methods = [
+            "file_path", "media_to_download", "media_downloaded",
+            "file_downloaded", "image_downloaded", "get_images"
+        ]
+
+        for method_name in methods:
+            method = getattr(self, method_name, None)
+            if callable(method):
+                setattr(self, method_name, self._compatible(method))
+
+    def _compatible(self, func):
+        """Wrapper for overridable methods to allow backwards compatibility"""
+        self._check_signature(func)
+
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            if self._expects_item[func.__name__]:
+                return func(*args, **kwargs)
+
+            kwargs.pop('item', None)
+            return func(*args, **kwargs)
+
+        return wrapper
+
+    def _check_signature(self, func):
+        sig = signature(func)
+        self._expects_item[func.__name__] = True
+
+        if 'item' not in sig.parameters:
+            old_params = str(sig)[1:-1]
+            new_params = old_params + ", *, item=None"
+            warn('%s(self, %s) is deprecated, '
+                 'please use %s(self, %s)'
+                 % (func.__name__, old_params, func.__name__, new_params),
+                 ScrapyDeprecationWarning, stacklevel=2)
+            self._expects_item[func.__name__] = False
+
+    def _modify_media_request(self, request):
+        if self.handle_httpstatus_list:
+            request.meta['handle_httpstatus_list'] = self.handle_httpstatus_list
+        else:
+            request.meta['handle_httpstatus_all'] = True
+
+    def _check_media_to_download(self, result, request, info, item):
+        if result is not None:
+            return result
+        if self.download_func:
+            # this ugly code was left only to support tests. TODO: remove
+            dfd = mustbe_deferred(self.download_func, request, info.spider)
+            dfd.addCallbacks(
+                callback=self.media_downloaded, callbackArgs=(request, info), callbackKeywords={'item': item},
+                errback=self.media_failed, errbackArgs=(request, info))
+        else:
+            self._modify_media_request(request)
+            dfd = self.crawler.engine.download(request, info.spider)
+            dfd.addCallbacks(
+                callback=self.media_downloaded, callbackArgs=(request, info), callbackKeywords={'item': item},
+                errback=self.media_failed, errbackArgs=(request, info))
+        return dfd
+
+    def _cache_result_and_execute_waiters(self, result, fp, info):
+        if isinstance(result, Failure):
+            # minimize cached information for failure
+            result.cleanFailure()
+            result.frames = []
+            result.stack = None
+
+            # This code fixes a memory leak by avoiding to keep references to
+            # the Request and Response objects on the Media Pipeline cache.
+            #
+            # What happens when the media_downloaded callback raises an
+            # exception, for example a FileException('download-error') when
+            # the Response status code is not 200 OK, is that the original
+            # StopIteration exception (which in turn contains the failed
+            # Response and by extension, the original Request) gets encapsulated
+            # within the FileException context.
+            #
+            # Originally, Scrapy was using twisted.internet.defer.returnValue
+            # inside functions decorated with twisted.internet.defer.inlineCallbacks,
+            # encapsulating the returned Response in a _DefGen_Return exception
+            # instead of a StopIteration.
+            #
+            # To avoid keeping references to the Response and therefore Request
+            # objects on the Media Pipeline cache, we should wipe the context of
+            # the encapsulated exception when it is a StopIteration instance
+            #
+            # This problem does not occur in Python 2.7 since we don't have
+            # Exception Chaining (https://www.python.org/dev/peps/pep-3134/).
+            context = getattr(result.value, '__context__', None)
+            if isinstance(context, StopIteration):
+                setattr(result.value, '__context__', None)
+
+        info.downloading.remove(fp)
+        info.downloaded[fp] = result  # cache result
+        for wad in info.waiting.pop(fp):
+            defer_result(result).chainDeferred(wad)
+
+    # Overridable Interface
+    def media_to_download(self, request, info, *, item=None):
+        """Check request before starting download"""
+        pass
+
+    def get_media_requests(self, item, info):
+        """Returns the media requests to download"""
+        pass
+
+    def media_downloaded(self, response, request, info, *, item=None):
+        """Handler for success downloads"""
+        return response
+
+    def media_failed(self, failure, request, info):
+        """Handler for failed downloads"""
+        return failure
+
+    def item_completed(self, results, item, info):
+        """Called per item when all media requests has been processed"""
+        if self.LOG_FAILED_RESULTS:
+            for ok, value in results:
+                if not ok:
+                    logger.error(
+                        '%(class)s found errors processing %(item)s',
+                        {'class': self.__class__.__name__, 'item': item},
+                        exc_info=failure_to_exc_info(value),
+                        extra={'spider': info.spider}
+                    )
+        return item
+
+    def file_path(self, request, response=None, info=None, *, item=None):
+        """Returns the path where downloaded media should be stored"""
+        pass
diff --git a/scrapy/pqueues.py b/scrapy/pqueues.py
new file mode 100644
index 000000000..e13d389ee
--- /dev/null
+++ b/scrapy/pqueues.py
@@ -0,0 +1,201 @@
+import hashlib
+import logging
+
+from scrapy.utils.misc import create_instance
+
+logger = logging.getLogger(__name__)
+
+
+def _path_safe(text):
+    """
+    Return a filesystem-safe version of a string ``text``
+
+    >>> _path_safe('simple.org').startswith('simple.org')
+    True
+    >>> _path_safe('dash-underscore_.org').startswith('dash-underscore_.org')
+    True
+    >>> _path_safe('some@symbol?').startswith('some_symbol_')
+    True
+    """
+    pathable_slot = "".join([c if c.isalnum() or c in '-._' else '_'
+                             for c in text])
+    # as we replace some letters we can get collision for different slots
+    # add we add unique part
+    unique_slot = hashlib.md5(text.encode('utf8')).hexdigest()
+    return '-'.join([pathable_slot, unique_slot])
+
+
+class ScrapyPriorityQueue:
+    """A priority queue implemented using multiple internal queues (typically,
+    FIFO queues). It uses one internal queue for each priority value. The internal
+    queue must implement the following methods:
+
+        * push(obj)
+        * pop()
+        * close()
+        * __len__()
+
+    ``__init__`` method of ScrapyPriorityQueue receives a downstream_queue_cls
+    argument, which is a class used to instantiate a new (internal) queue when
+    a new priority is allocated.
+
+    Only integer priorities should be used. Lower numbers are higher
+    priorities.
+
+    startprios is a sequence of priorities to start with. If the queue was
+    previously closed leaving some priority buckets non-empty, those priorities
+    should be passed in startprios.
+
+    """
+
+    @classmethod
+    def from_crawler(cls, crawler, downstream_queue_cls, key, startprios=()):
+        return cls(crawler, downstream_queue_cls, key, startprios)
+
+    def __init__(self, crawler, downstream_queue_cls, key, startprios=()):
+        self.crawler = crawler
+        self.downstream_queue_cls = downstream_queue_cls
+        self.key = key
+        self.queues = {}
+        self.curprio = None
+        self.init_prios(startprios)
+
+    def init_prios(self, startprios):
+        if not startprios:
+            return
+
+        for priority in startprios:
+            self.queues[priority] = self.qfactory(priority)
+
+        self.curprio = min(startprios)
+
+    def qfactory(self, key):
+        return create_instance(self.downstream_queue_cls,
+                               None,
+                               self.crawler,
+                               self.key + '/' + str(key))
+
+    def priority(self, request):
+        return -request.priority
+
+    def push(self, request):
+        priority = self.priority(request)
+        if priority not in self.queues:
+            self.queues[priority] = self.qfactory(priority)
+        q = self.queues[priority]
+        q.push(request)  # this may fail (eg. serialization error)
+        if self.curprio is None or priority < self.curprio:
+            self.curprio = priority
+
+    def pop(self):
+        if self.curprio is None:
+            return
+        q = self.queues[self.curprio]
+        m = q.pop()
+        if not q:
+            del self.queues[self.curprio]
+            q.close()
+            prios = [p for p, q in self.queues.items() if q]
+            self.curprio = min(prios) if prios else None
+        return m
+
+    def close(self):
+        active = []
+        for p, q in self.queues.items():
+            active.append(p)
+            q.close()
+        return active
+
+    def __len__(self):
+        return sum(len(x) for x in self.queues.values()) if self.queues else 0
+
+
+class DownloaderInterface:
+
+    def __init__(self, crawler):
+        self.downloader = crawler.engine.downloader
+
+    def stats(self, possible_slots):
+        return [(self._active_downloads(slot), slot)
+                for slot in possible_slots]
+
+    def get_slot_key(self, request):
+        return self.downloader._get_slot_key(request, None)
+
+    def _active_downloads(self, slot):
+        """ Return a number of requests in a Downloader for a given slot """
+        if slot not in self.downloader.slots:
+            return 0
+        return len(self.downloader.slots[slot].active)
+
+
+class DownloaderAwarePriorityQueue:
+    """ PriorityQueue which takes Downloader activity into account:
+    domains (slots) with the least amount of active downloads are dequeued
+    first.
+    """
+
+    @classmethod
+    def from_crawler(cls, crawler, downstream_queue_cls, key, startprios=()):
+        return cls(crawler, downstream_queue_cls, key, startprios)
+
+    def __init__(self, crawler, downstream_queue_cls, key, slot_startprios=()):
+        if crawler.settings.getint('CONCURRENT_REQUESTS_PER_IP') != 0:
+            raise ValueError('"%s" does not support CONCURRENT_REQUESTS_PER_IP'
+                             % (self.__class__,))
+
+        if slot_startprios and not isinstance(slot_startprios, dict):
+            raise ValueError("DownloaderAwarePriorityQueue accepts "
+                             "``slot_startprios`` as a dict; %r instance "
+                             "is passed. Most likely, it means the state is"
+                             "created by an incompatible priority queue. "
+                             "Only a crawl started with the same priority "
+                             "queue class can be resumed." %
+                             slot_startprios.__class__)
+
+        self._downloader_interface = DownloaderInterface(crawler)
+        self.downstream_queue_cls = downstream_queue_cls
+        self.key = key
+        self.crawler = crawler
+
+        self.pqueues = {}  # slot -> priority queue
+        for slot, startprios in (slot_startprios or {}).items():
+            self.pqueues[slot] = self.pqfactory(slot, startprios)
+
+    def pqfactory(self, slot, startprios=()):
+        return ScrapyPriorityQueue(self.crawler,
+                                   self.downstream_queue_cls,
+                                   self.key + '/' + _path_safe(slot),
+                                   startprios)
+
+    def pop(self):
+        stats = self._downloader_interface.stats(self.pqueues)
+
+        if not stats:
+            return
+
+        slot = min(stats)[1]
+        queue = self.pqueues[slot]
+        request = queue.pop()
+        if len(queue) == 0:
+            del self.pqueues[slot]
+        return request
+
+    def push(self, request):
+        slot = self._downloader_interface.get_slot_key(request)
+        if slot not in self.pqueues:
+            self.pqueues[slot] = self.pqfactory(slot)
+        queue = self.pqueues[slot]
+        queue.push(request)
+
+    def close(self):
+        active = {slot: queue.close()
+                  for slot, queue in self.pqueues.items()}
+        self.pqueues.clear()
+        return active
+
+    def __len__(self):
+        return sum(len(x) for x in self.pqueues.values()) if self.pqueues else 0
+
+    def __contains__(self, slot):
+        return slot in self.pqueues
diff --git a/scrapy/project.py b/scrapy/project.py
deleted file mode 100644
index bbe947761..000000000
--- a/scrapy/project.py
+++ /dev/null
@@ -1,13 +0,0 @@
-"""
---------- WARNING: THIS MODULE IS DEPRECATED -----------
-
-This module is deprecated. If you want to get the Scrapy crawler from your
-extension, middleware or pipeline implement the `from_crawler` class method.
-
-For example:
-
-    @classmethod
-    def from_crawler(cls, crawler):
-        return cls(crawler)
-
-"""
diff --git a/scrapy/resolver.py b/scrapy/resolver.py
index 7d9811727..f191deac6 100644
--- a/scrapy/resolver.py
+++ b/scrapy/resolver.py
@@ -1,22 +1,105 @@
 from twisted.internet import defer
 from twisted.internet.base import ThreadedResolver
+from twisted.internet.interfaces import IHostnameResolver, IResolutionReceiver, IResolverSimple
+from zope.interface.declarations import implementer, provider
 
 from scrapy.utils.datatypes import LocalCache
 
-# TODO: cache misses
-# TODO: make cache size a setting
 
+# TODO: cache misses
 dnscache = LocalCache(10000)
 
-class CachingThreadedResolver(ThreadedResolver):
 
-    def getHostByName(self, name, timeout = (1, 3, 11, 45)):
+@implementer(IResolverSimple)
+class CachingThreadedResolver(ThreadedResolver):
+    """
+    Default caching resolver. IPv4 only, supports setting a timeout value for DNS requests.
+    """
+
+    def __init__(self, reactor, cache_size, timeout):
+        super().__init__(reactor)
+        dnscache.limit = cache_size
+        self.timeout = timeout
+
+    @classmethod
+    def from_crawler(cls, crawler, reactor):
+        if crawler.settings.getbool('DNSCACHE_ENABLED'):
+            cache_size = crawler.settings.getint('DNSCACHE_SIZE')
+        else:
+            cache_size = 0
+        return cls(reactor, cache_size, crawler.settings.getfloat('DNS_TIMEOUT'))
+
+    def install_on_reactor(self):
+        self.reactor.installResolver(self)
+
+    def getHostByName(self, name, timeout=None):
         if name in dnscache:
             return defer.succeed(dnscache[name])
-        d = ThreadedResolver.getHostByName(self, name, timeout)
-        d.addCallback(self._cache_result, name)
+        # in Twisted<=16.6, getHostByName() is always called with
+        # a default timeout of 60s (actually passed as (1, 3, 11, 45) tuple),
+        # so the input argument above is simply overridden
+        # to enforce Scrapy's DNS_TIMEOUT setting's value
+        timeout = (self.timeout,)
+        d = super().getHostByName(name, timeout)
+        if dnscache.limit:
+            d.addCallback(self._cache_result, name)
         return d
 
     def _cache_result(self, result, name):
         dnscache[name] = result
         return result
+
+
+@implementer(IHostnameResolver)
+class CachingHostnameResolver:
+    """
+    Experimental caching resolver. Resolves IPv4 and IPv6 addresses,
+    does not support setting a timeout value for DNS requests.
+    """
+
+    def __init__(self, reactor, cache_size):
+        self.reactor = reactor
+        self.original_resolver = reactor.nameResolver
+        dnscache.limit = cache_size
+
+    @classmethod
+    def from_crawler(cls, crawler, reactor):
+        if crawler.settings.getbool('DNSCACHE_ENABLED'):
+            cache_size = crawler.settings.getint('DNSCACHE_SIZE')
+        else:
+            cache_size = 0
+        return cls(reactor, cache_size)
+
+    def install_on_reactor(self):
+        self.reactor.installNameResolver(self)
+
+    def resolveHostName(self, resolutionReceiver, hostName, portNumber=0,
+                        addressTypes=None, transportSemantics='TCP'):
+
+        @provider(IResolutionReceiver)
+        class CachingResolutionReceiver(resolutionReceiver):
+
+            def resolutionBegan(self, resolution):
+                super().resolutionBegan(resolution)
+                self.resolution = resolution
+                self.resolved = False
+
+            def addressResolved(self, address):
+                super().addressResolved(address)
+                self.resolved = True
+
+            def resolutionComplete(self):
+                super().resolutionComplete()
+                if self.resolved:
+                    dnscache[hostName] = self.resolution
+
+        try:
+            return dnscache[hostName]
+        except KeyError:
+            return self.original_resolver.resolveHostName(
+                CachingResolutionReceiver(),
+                hostName,
+                portNumber,
+                addressTypes,
+                transportSemantics
+            )
diff --git a/scrapy/responsetypes.py b/scrapy/responsetypes.py
index 460385444..d207088e6 100644
--- a/scrapy/responsetypes.py
+++ b/scrapy/responsetypes.py
@@ -1,19 +1,17 @@
 """
 This module implements a class which returns the appropriate Response class
 based on different criteria.
-
 """
-
 from mimetypes import MimeTypes
 from pkgutil import get_data
 from io import StringIO
-import six
 
 from scrapy.http import Response
 from scrapy.utils.misc import load_object
-from scrapy.utils.python import isbinarytext
+from scrapy.utils.python import binary_is_text, to_bytes, to_unicode
 
-class ResponseTypes(object):
+
+class ResponseTypes:
 
     CLASSES = {
         'text/html': 'scrapy.http.HtmlResponse',
@@ -24,6 +22,8 @@ class ResponseTypes(object):
         'application/vnd.wap.xhtml+xml': 'scrapy.http.HtmlResponse',
         'application/xml': 'scrapy.http.XmlResponse',
         'application/json': 'scrapy.http.TextResponse',
+        'application/x-json': 'scrapy.http.TextResponse',
+        'application/json-amazonui-streaming': 'scrapy.http.TextResponse',
         'application/javascript': 'scrapy.http.TextResponse',
         'application/x-javascript': 'scrapy.http.TextResponse',
         'text/xml': 'scrapy.http.XmlResponse',
@@ -35,7 +35,7 @@ class ResponseTypes(object):
         self.mimetypes = MimeTypes()
         mimedata = get_data('scrapy', 'mime.types').decode('utf8')
         self.mimetypes.readfp(StringIO(mimedata))
-        for mimetype, cls in six.iteritems(self.CLASSES):
+        for mimetype, cls in self.CLASSES.items():
             self.classes[mimetype] = load_object(cls)
 
     def from_mimetype(self, mimetype):
@@ -53,13 +53,14 @@ class ResponseTypes(object):
         header """
         if content_encoding:
             return Response
-        mimetype = content_type.split(';')[0].strip().lower()
+        mimetype = to_unicode(content_type).split(';')[0].strip().lower()
         return self.from_mimetype(mimetype)
 
     def from_content_disposition(self, content_disposition):
         try:
-            filename = content_disposition.split(';')[1].split('=')[1]
-            filename = filename.strip('"\'')
+            filename = to_unicode(
+                content_disposition, encoding='latin-1', errors='replace'
+            ).split(';')[1].split('=')[1].strip('"\'')
             return self.from_filename(filename)
         except IndexError:
             return Response
@@ -68,11 +69,13 @@ class ResponseTypes(object):
         """Return the most appropriate Response class by looking at the HTTP
         headers"""
         cls = Response
-        if 'Content-Type' in headers:
-            cls = self.from_content_type(headers['Content-type'], \
-                headers.get('Content-Encoding'))
-        if cls is Response and 'Content-Disposition' in headers:
-            cls = self.from_content_disposition(headers['Content-Disposition'])
+        if b'Content-Type' in headers:
+            cls = self.from_content_type(
+                content_type=headers[b'Content-Type'],
+                content_encoding=headers.get(b'Content-Encoding')
+            )
+        if cls is Response and b'Content-Disposition' in headers:
+            cls = self.from_content_disposition(headers[b'Content-Disposition'])
         return cls
 
     def from_filename(self, filename):
@@ -89,17 +92,19 @@ class ResponseTypes(object):
         it's not meant to be used except for special cases where response types
         cannot be guess using more straightforward methods."""
         chunk = body[:5000]
-        if isbinarytext(chunk):
+        chunk = to_bytes(chunk)
+        if not binary_is_text(chunk):
             return self.from_mimetype('application/octet-stream')
-        elif "<html>" in chunk.lower():
+        elif b"<html>" in chunk.lower():
             return self.from_mimetype('text/html')
-        elif "<?xml" in chunk.lower():
+        elif b"<?xml" in chunk.lower():
             return self.from_mimetype('text/xml')
         else:
             return self.from_mimetype('text')
 
     def from_args(self, headers=None, url=None, filename=None, body=None):
-        """Guess the most appropriate Response class based on the given arguments"""
+        """Guess the most appropriate Response class based on
+        the given arguments."""
         cls = Response
         if headers is not None:
             cls = self.from_headers(headers)
@@ -111,4 +116,5 @@ class ResponseTypes(object):
             cls = self.from_body(body)
         return cls
 
+
 responsetypes = ResponseTypes()
diff --git a/scrapy/robotstxt.py b/scrapy/robotstxt.py
new file mode 100644
index 000000000..f8649e56b
--- /dev/null
+++ b/scrapy/robotstxt.py
@@ -0,0 +1,130 @@
+import sys
+import logging
+from abc import ABCMeta, abstractmethod
+
+from scrapy.utils.python import to_unicode
+
+
+logger = logging.getLogger(__name__)
+
+
+def decode_robotstxt(robotstxt_body, spider, to_native_str_type=False):
+    try:
+        if to_native_str_type:
+            robotstxt_body = to_unicode(robotstxt_body)
+        else:
+            robotstxt_body = robotstxt_body.decode('utf-8')
+    except UnicodeDecodeError:
+        # If we found garbage or robots.txt in an encoding other than UTF-8, disregard it.
+        # Switch to 'allow all' state.
+        logger.warning(
+            "Failure while parsing robots.txt. File either contains garbage or "
+            "is in an encoding other than UTF-8, treating it as an empty file.",
+            exc_info=sys.exc_info(),
+            extra={'spider': spider},
+        )
+        robotstxt_body = ''
+    return robotstxt_body
+
+
+class RobotParser(metaclass=ABCMeta):
+    @classmethod
+    @abstractmethod
+    def from_crawler(cls, crawler, robotstxt_body):
+        """Parse the content of a robots.txt_ file as bytes. This must be a class method.
+        It must return a new instance of the parser backend.
+
+        :param crawler: crawler which made the request
+        :type crawler: :class:`~scrapy.crawler.Crawler` instance
+
+        :param robotstxt_body: content of a robots.txt_ file.
+        :type robotstxt_body: bytes
+        """
+        pass
+
+    @abstractmethod
+    def allowed(self, url, user_agent):
+        """Return ``True`` if  ``user_agent`` is allowed to crawl ``url``, otherwise return ``False``.
+
+        :param url: Absolute URL
+        :type url: str
+
+        :param user_agent: User agent
+        :type user_agent: str
+        """
+        pass
+
+
+class PythonRobotParser(RobotParser):
+    def __init__(self, robotstxt_body, spider):
+        from urllib.robotparser import RobotFileParser
+        self.spider = spider
+        robotstxt_body = decode_robotstxt(robotstxt_body, spider, to_native_str_type=True)
+        self.rp = RobotFileParser()
+        self.rp.parse(robotstxt_body.splitlines())
+
+    @classmethod
+    def from_crawler(cls, crawler, robotstxt_body):
+        spider = None if not crawler else crawler.spider
+        o = cls(robotstxt_body, spider)
+        return o
+
+    def allowed(self, url, user_agent):
+        user_agent = to_unicode(user_agent)
+        url = to_unicode(url)
+        return self.rp.can_fetch(user_agent, url)
+
+
+class ReppyRobotParser(RobotParser):
+    def __init__(self, robotstxt_body, spider):
+        from reppy.robots import Robots
+        self.spider = spider
+        self.rp = Robots.parse('', robotstxt_body)
+
+    @classmethod
+    def from_crawler(cls, crawler, robotstxt_body):
+        spider = None if not crawler else crawler.spider
+        o = cls(robotstxt_body, spider)
+        return o
+
+    def allowed(self, url, user_agent):
+        return self.rp.allowed(url, user_agent)
+
+
+class RerpRobotParser(RobotParser):
+    def __init__(self, robotstxt_body, spider):
+        from robotexclusionrulesparser import RobotExclusionRulesParser
+        self.spider = spider
+        self.rp = RobotExclusionRulesParser()
+        robotstxt_body = decode_robotstxt(robotstxt_body, spider)
+        self.rp.parse(robotstxt_body)
+
+    @classmethod
+    def from_crawler(cls, crawler, robotstxt_body):
+        spider = None if not crawler else crawler.spider
+        o = cls(robotstxt_body, spider)
+        return o
+
+    def allowed(self, url, user_agent):
+        user_agent = to_unicode(user_agent)
+        url = to_unicode(url)
+        return self.rp.is_allowed(user_agent, url)
+
+
+class ProtegoRobotParser(RobotParser):
+    def __init__(self, robotstxt_body, spider):
+        from protego import Protego
+        self.spider = spider
+        robotstxt_body = decode_robotstxt(robotstxt_body, spider)
+        self.rp = Protego.parse(robotstxt_body)
+
+    @classmethod
+    def from_crawler(cls, crawler, robotstxt_body):
+        spider = None if not crawler else crawler.spider
+        o = cls(robotstxt_body, spider)
+        return o
+
+    def allowed(self, url, user_agent):
+        user_agent = to_unicode(user_agent)
+        url = to_unicode(url)
+        return self.rp.can_fetch(url, user_agent)
diff --git a/scrapy/selector/__init__.py b/scrapy/selector/__init__.py
index bfbde4de9..85c500d66 100644
--- a/scrapy/selector/__init__.py
+++ b/scrapy/selector/__init__.py
@@ -1,5 +1,6 @@
 """
 Selectors
 """
-from scrapy.selector.unified import *
-from scrapy.selector.lxmlsel import *
+
+# top-level imports
+from scrapy.selector.unified import Selector, SelectorList
diff --git a/scrapy/selector/csstranslator.py b/scrapy/selector/csstranslator.py
deleted file mode 100644
index 7482837a0..000000000
--- a/scrapy/selector/csstranslator.py
+++ /dev/null
@@ -1,88 +0,0 @@
-from cssselect import GenericTranslator, HTMLTranslator
-from cssselect.xpath import _unicode_safe_getattr, XPathExpr, ExpressionError
-from cssselect.parser import FunctionalPseudoElement
-
-
-class ScrapyXPathExpr(XPathExpr):
-
-    textnode = False
-    attribute = None
-
-    @classmethod
-    def from_xpath(cls, xpath, textnode=False, attribute=None):
-        x = cls(path=xpath.path, element=xpath.element, condition=xpath.condition)
-        x.textnode = textnode
-        x.attribute = attribute
-        return x
-
-    def __str__(self):
-        path = super(ScrapyXPathExpr, self).__str__()
-        if self.textnode:
-            if path == '*':
-                path = 'text()'
-            elif path.endswith('::*/*'):
-                path = path[:-3] + 'text()'
-            else:
-                path += '/text()'
-
-        if self.attribute is not None:
-            if path.endswith('::*/*'):
-                path = path[:-2]
-            path += '/@%s' % self.attribute
-
-        return path
-
-    def join(self, combiner, other):
-        super(ScrapyXPathExpr, self).join(combiner, other)
-        self.textnode = other.textnode
-        self.attribute = other.attribute
-        return self
-
-
-class TranslatorMixin(object):
-
-    def xpath_element(self, selector):
-        xpath = super(TranslatorMixin, self).xpath_element(selector)
-        return ScrapyXPathExpr.from_xpath(xpath)
-
-    def xpath_pseudo_element(self, xpath, pseudo_element):
-        if isinstance(pseudo_element, FunctionalPseudoElement):
-            method = 'xpath_%s_functional_pseudo_element' % (
-                pseudo_element.name.replace('-', '_'))
-            method = _unicode_safe_getattr(self, method, None)
-            if not method:
-                raise ExpressionError(
-                    "The functional pseudo-element ::%s() is unknown"
-                % pseudo_element.name)
-            xpath = method(xpath, pseudo_element)
-        else:
-            method = 'xpath_%s_simple_pseudo_element' % (
-                pseudo_element.replace('-', '_'))
-            method = _unicode_safe_getattr(self, method, None)
-            if not method:
-                raise ExpressionError(
-                    "The pseudo-element ::%s is unknown"
-                    % pseudo_element)
-            xpath = method(xpath)
-        return xpath
-
-    def xpath_attr_functional_pseudo_element(self, xpath, function):
-        if function.argument_types() not in (['STRING'], ['IDENT']):
-            raise ExpressionError(
-                "Expected a single string or ident for ::attr(), got %r"
-                % function.arguments)
-        return ScrapyXPathExpr.from_xpath(xpath,
-            attribute=function.arguments[0].value)
-
-    def xpath_text_simple_pseudo_element(self, xpath):
-        """Support selecting text nodes using ::text pseudo-element"""
-        return ScrapyXPathExpr.from_xpath(xpath, textnode=True)
-
-
-class ScrapyGenericTranslator(TranslatorMixin, GenericTranslator):
-    pass
-
-
-class ScrapyHTMLTranslator(TranslatorMixin, HTMLTranslator):
-    pass
-
diff --git a/scrapy/selector/lxmldocument.py b/scrapy/selector/lxmldocument.py
deleted file mode 100644
index 817349b58..000000000
--- a/scrapy/selector/lxmldocument.py
+++ /dev/null
@@ -1,31 +0,0 @@
-"""
-This module contains a simple class (LxmlDocument) which provides cache and
-garbage collection to lxml element tree documents.
-"""
-
-import weakref
-from lxml import etree
-from scrapy.utils.trackref import object_ref
-
-
-def _factory(response, parser_cls):
-    url = response.url
-    body = response.body_as_unicode().strip().encode('utf8') or '<html/>'
-    parser = parser_cls(recover=True, encoding='utf8')
-    return etree.fromstring(body, parser=parser, base_url=url)
-
-
-class LxmlDocument(object_ref):
-
-    cache = weakref.WeakKeyDictionary()
-    __slots__ = ['__weakref__']
-
-    def __new__(cls, response, parser=etree.HTMLParser):
-        cache = cls.cache.setdefault(response, {})
-        if parser not in cache:
-            obj = object_ref.__new__(cls)
-            cache[parser] = _factory(response, parser)
-        return cache[parser]
-
-    def __str__(self):
-        return "<LxmlDocument %s>" % self.root.tag
diff --git a/scrapy/selector/lxmlsel.py b/scrapy/selector/lxmlsel.py
deleted file mode 100644
index 070cb2306..000000000
--- a/scrapy/selector/lxmlsel.py
+++ /dev/null
@@ -1,50 +0,0 @@
-"""
-XPath selectors based on lxml
-"""
-from scrapy.utils.deprecate import create_deprecated_class
-from .unified import Selector, SelectorList
-
-
-__all__ = ['HtmlXPathSelector', 'XmlXPathSelector', 'XPathSelector',
-           'XPathSelectorList']
-
-def _xpathselector_css(self, *a, **kw):
-    raise RuntimeError('.css() method not available for %s, '
-                        'instantiate scrapy.Selector '
-                        'instead' % type(self).__name__)
-
-XPathSelector = create_deprecated_class(
-    'XPathSelector',
-    Selector,
-    {
-        '__slots__': (),
-        '_default_type': 'html',
-        'css': _xpathselector_css,
-    },
-    new_class_path='scrapy.Selector',
-    old_class_path='scrapy.selector.XPathSelector',
-)
-
-XmlXPathSelector = create_deprecated_class(
-    'XmlXPathSelector',
-    XPathSelector,
-    clsdict={
-        '__slots__': (),
-        '_default_type': 'xml',
-    },
-    new_class_path='scrapy.Selector',
-    old_class_path='scrapy.selector.XmlXPathSelector',
-)
-
-HtmlXPathSelector = create_deprecated_class(
-    'HtmlXPathSelector',
-    XPathSelector,
-    clsdict={
-        '__slots__': (),
-        '_default_type': 'html',
-    },
-    new_class_path='scrapy.Selector',
-    old_class_path='scrapy.selector.HtmlXPathSelector',
-)
-
-XPathSelectorList = create_deprecated_class('XPathSelectorList', SelectorList)
diff --git a/scrapy/selector/unified.py b/scrapy/selector/unified.py
index b8a3678a8..f12c61081 100644
--- a/scrapy/selector/unified.py
+++ b/scrapy/selector/unified.py
@@ -2,190 +2,81 @@
 XPath selectors based on lxml
 """
 
-from lxml import etree
-
-from scrapy.utils.misc import extract_regex
+from parsel import Selector as _ParselSelector
 from scrapy.utils.trackref import object_ref
-from scrapy.utils.python import unicode_to_str, flatten
-from scrapy.utils.decorator import deprecated
+from scrapy.utils.python import to_bytes
 from scrapy.http import HtmlResponse, XmlResponse
-from .lxmldocument import LxmlDocument
-from .csstranslator import ScrapyHTMLTranslator, ScrapyGenericTranslator
 
 
 __all__ = ['Selector', 'SelectorList']
 
 
-class SafeXMLParser(etree.XMLParser):
-    def __init__(self, *args, **kwargs):
-        kwargs.setdefault('resolve_entities', False)
-        super(SafeXMLParser, self).__init__(*args, **kwargs)
-
-_ctgroup = {
-    'html': {'_parser': etree.HTMLParser,
-             '_csstranslator': ScrapyHTMLTranslator(),
-             '_tostring_method': 'html'},
-    'xml': {'_parser': SafeXMLParser,
-            '_csstranslator': ScrapyGenericTranslator(),
-            '_tostring_method': 'xml'},
-}
-
-
 def _st(response, st):
     if st is None:
         return 'xml' if isinstance(response, XmlResponse) else 'html'
-    elif st in ('xml', 'html'):
-        return st
-    else:
-        raise ValueError('Invalid type: %s' % st)
+    return st
 
 
 def _response_from_text(text, st):
     rt = XmlResponse if st == 'xml' else HtmlResponse
     return rt(url='about:blank', encoding='utf-8',
-              body=unicode_to_str(text, 'utf-8'))
+              body=to_bytes(text, 'utf-8'))
 
 
-class Selector(object_ref):
+class SelectorList(_ParselSelector.selectorlist_cls, object_ref):
+    """
+    The :class:`SelectorList` class is a subclass of the builtin ``list``
+    class, which provides a few additional methods.
+    """
 
-    __slots__ = ['response', 'text', 'namespaces', 'type', '_expr', '_root',
-                 '__weakref__', '_parser', '_csstranslator', '_tostring_method']
 
-    _default_type = None
-    _default_namespaces = {
-        "re": "http://exslt.org/regular-expressions",
+class Selector(_ParselSelector, object_ref):
+    """
+    An instance of :class:`Selector` is a wrapper over response to select
+    certain parts of its content.
 
-        # supported in libxslt:
-        # set:difference
-        # set:has-same-node
-        # set:intersection
-        # set:leading
-        # set:trailing
-        "set": "http://exslt.org/sets"
-    }
-    _lxml_smart_strings = False
+    ``response`` is an :class:`~scrapy.http.HtmlResponse` or an
+    :class:`~scrapy.http.XmlResponse` object that will be used for selecting
+    and extracting data.
 
-    def __init__(self, response=None, text=None, type=None, namespaces=None,
-                 _root=None, _expr=None):
-        self.type = st = _st(response, type or self._default_type)
-        self._parser = _ctgroup[st]['_parser']
-        self._csstranslator = _ctgroup[st]['_csstranslator']
-        self._tostring_method = _ctgroup[st]['_tostring_method']
+    ``text`` is a unicode string or utf-8 encoded text for cases when a
+    ``response`` isn't available. Using ``text`` and ``response`` together is
+    undefined behavior.
+
+    ``type`` defines the selector type, it can be ``"html"``, ``"xml"``
+    or ``None`` (default).
+
+    If ``type`` is ``None``, the selector automatically chooses the best type
+    based on ``response`` type (see below), or defaults to ``"html"`` in case it
+    is used together with ``text``.
+
+    If ``type`` is ``None`` and a ``response`` is passed, the selector type is
+    inferred from the response type as follows:
+
+    * ``"html"`` for :class:`~scrapy.http.HtmlResponse` type
+    * ``"xml"`` for :class:`~scrapy.http.XmlResponse` type
+    * ``"html"`` for anything else
+
+    Otherwise, if ``type`` is set, the selector type will be forced and no
+    detection will occur.
+    """
+
+    __slots__ = ['response']
+    selectorlist_cls = SelectorList
+
+    def __init__(self, response=None, text=None, type=None, root=None, **kwargs):
+        if response is not None and text is not None:
+            raise ValueError('%s.__init__() received both response and text'
+                             % self.__class__.__name__)
+
+        st = _st(response, type or self._default_type)
 
         if text is not None:
             response = _response_from_text(text, st)
 
         if response is not None:
-            _root = LxmlDocument(response, self._parser)
+            text = response.text
+            kwargs.setdefault('base_url', response.url)
 
         self.response = response
-        self.namespaces = dict(self._default_namespaces)
-        if namespaces is not None:
-            self.namespaces.update(namespaces)
-        self._root = _root
-        self._expr = _expr
-
-    def xpath(self, query):
-        try:
-            xpathev = self._root.xpath
-        except AttributeError:
-            return SelectorList([])
-
-        try:
-            result = xpathev(query, namespaces=self.namespaces,
-                             smart_strings=self._lxml_smart_strings)
-        except etree.XPathError:
-            raise ValueError("Invalid XPath: %s" % query)
-
-        if type(result) is not list:
-            result = [result]
-
-        result = [self.__class__(_root=x, _expr=query,
-                                 namespaces=self.namespaces,
-                                 type=self.type)
-                  for x in result]
-        return SelectorList(result)
-
-    def css(self, query):
-        return self.xpath(self._css2xpath(query))
-
-    def _css2xpath(self, query):
-        return self._csstranslator.css_to_xpath(query)
-
-    def re(self, regex):
-        return extract_regex(regex, self.extract())
-
-    def extract(self):
-        try:
-            return etree.tostring(self._root,
-                                  method=self._tostring_method,
-                                  encoding=unicode,
-                                  with_tail=False)
-        except (AttributeError, TypeError):
-            if self._root is True:
-                return u'1'
-            elif self._root is False:
-                return u'0'
-            else:
-                return unicode(self._root)
-
-    def register_namespace(self, prefix, uri):
-        if self.namespaces is None:
-            self.namespaces = {}
-        self.namespaces[prefix] = uri
-
-    def remove_namespaces(self):
-        for el in self._root.iter('*'):
-            if el.tag.startswith('{'):
-                el.tag = el.tag.split('}', 1)[1]
-            # loop on element attributes also
-            for an in el.attrib.keys():
-                if an.startswith('{'):
-                    el.attrib[an.split('}', 1)[1]] = el.attrib.pop(an)
-
-    def __nonzero__(self):
-        return bool(self.extract())
-
-    def __str__(self):
-        data = repr(self.extract()[:40])
-        return "<%s xpath=%r data=%s>" % (type(self).__name__, self._expr, data)
-    __repr__ = __str__
-
-    # Deprecated api
-    @deprecated(use_instead='.xpath()')
-    def select(self, xpath):
-        return self.xpath(xpath)
-
-    @deprecated(use_instead='.extract()')
-    def extract_unquoted(self):
-        return self.extract()
-
-
-class SelectorList(list):
-
-    def __getslice__(self, i, j):
-        return self.__class__(list.__getslice__(self, i, j))
-
-    def xpath(self, xpath):
-        return self.__class__(flatten([x.xpath(xpath) for x in self]))
-
-    def css(self, xpath):
-        return self.__class__(flatten([x.css(xpath) for x in self]))
-
-    def re(self, regex):
-        return flatten([x.re(regex) for x in self])
-
-    def extract(self):
-        return [x.extract() for x in self]
-
-    @deprecated(use_instead='.extract()')
-    def extract_unquoted(self):
-        return [x.extract_unquoted() for x in self]
-
-    @deprecated(use_instead='.xpath()')
-    def x(self, xpath):
-        return self.select(xpath)
-
-    @deprecated(use_instead='.xpath()')
-    def select(self, xpath):
-        return self.xpath(xpath)
+        super().__init__(text=text, type=st, root=root, **kwargs)
diff --git a/scrapy/settings/__init__.py b/scrapy/settings/__init__.py
index 2dd6a2920..951fc65e2 100644
--- a/scrapy/settings/__init__.py
+++ b/scrapy/settings/__init__.py
@@ -1,24 +1,34 @@
-import six
 import json
-import warnings
-from collections import MutableMapping
+import copy
+from collections.abc import MutableMapping
 from importlib import import_module
+from pprint import pformat
 
-from scrapy.utils.deprecate import create_deprecated_class
-from scrapy.exceptions import ScrapyDeprecationWarning
-
-from . import default_settings
+from scrapy.settings import default_settings
 
 
 SETTINGS_PRIORITIES = {
     'default': 0,
     'command': 10,
     'project': 20,
+    'spider': 30,
     'cmdline': 40,
 }
 
 
-class SettingsAttribute(object):
+def get_settings_priority(priority):
+    """
+    Small helper function that looks up a given string priority in the
+    :attr:`~scrapy.settings.SETTINGS_PRIORITIES` dictionary and returns its
+    numerical value, or directly returns a given numerical priority.
+    """
+    if isinstance(priority, str):
+        return SETTINGS_PRIORITIES[priority]
+    else:
+        return priority
+
+
+class SettingsAttribute:
 
     """Class for storing data related to settings attributes.
 
@@ -28,112 +38,367 @@ class SettingsAttribute(object):
 
     def __init__(self, value, priority):
         self.value = value
-        self.priority = priority
+        if isinstance(self.value, BaseSettings):
+            self.priority = max(self.value.maxpriority(), priority)
+        else:
+            self.priority = priority
 
     def set(self, value, priority):
         """Sets value if priority is higher or equal than current priority."""
         if priority >= self.priority:
+            if isinstance(self.value, BaseSettings):
+                value = BaseSettings(value, priority=priority)
             self.value = value
             self.priority = priority
 
     def __str__(self):
-        return "<SettingsAttribute value={self.value!r} " \
-               "priority={self.priority}>".format(self=self)
+        return "<SettingsAttribute value={self.value!r} priority={self.priority}>".format(self=self)
 
     __repr__ = __str__
 
 
-class Settings(object):
+class BaseSettings(MutableMapping):
+    """
+    Instances of this class behave like dictionaries, but store priorities
+    along with their ``(key, value)`` pairs, and can be frozen (i.e. marked
+    immutable).
+
+    Key-value entries can be passed on initialization with the ``values``
+    argument, and they would take the ``priority`` level (unless ``values`` is
+    already an instance of :class:`~scrapy.settings.BaseSettings`, in which
+    case the existing priority levels will be kept).  If the ``priority``
+    argument is a string, the priority name will be looked up in
+    :attr:`~scrapy.settings.SETTINGS_PRIORITIES`. Otherwise, a specific integer
+    should be provided.
+
+    Once the object is created, new settings can be loaded or updated with the
+    :meth:`~scrapy.settings.BaseSettings.set` method, and can be accessed with
+    the square bracket notation of dictionaries, or with the
+    :meth:`~scrapy.settings.BaseSettings.get` method of the instance and its
+    value conversion variants. When requesting a stored key, the value with the
+    highest priority will be retrieved.
+    """
 
     def __init__(self, values=None, priority='project'):
+        self.frozen = False
         self.attributes = {}
-        self.setmodule(default_settings, priority='default')
-        if values is not None:
-            self.setdict(values, priority)
+        if values:
+            self.update(values, priority)
 
     def __getitem__(self, opt_name):
-        value = None
-        if opt_name in self.attributes:
-            value = self.attributes[opt_name].value
-        return value
+        if opt_name not in self:
+            return None
+        return self.attributes[opt_name].value
+
+    def __contains__(self, name):
+        return name in self.attributes
 
     def get(self, name, default=None):
+        """
+        Get a setting value without affecting its original type.
+
+        :param name: the setting name
+        :type name: str
+
+        :param default: the value to return if no setting is found
+        :type default: object
+        """
         return self[name] if self[name] is not None else default
 
     def getbool(self, name, default=False):
         """
-        True is: 1, '1', True
-        False is: 0, '0', False, None
+        Get a setting value as a boolean.
+
+        ``1``, ``'1'``, `True`` and ``'True'`` return ``True``,
+        while ``0``, ``'0'``, ``False``, ``'False'`` and ``None`` return ``False``.
+
+        For example, settings populated through environment variables set to
+        ``'0'`` will return ``False`` when using this method.
+
+        :param name: the setting name
+        :type name: str
+
+        :param default: the value to return if no setting is found
+        :type default: object
         """
-        return bool(int(self.get(name, default)))
+        got = self.get(name, default)
+        try:
+            return bool(int(got))
+        except ValueError:
+            if got in ("True", "true"):
+                return True
+            if got in ("False", "false"):
+                return False
+            raise ValueError("Supported values for boolean settings "
+                             "are 0/1, True/False, '0'/'1', "
+                             "'True'/'False' and 'true'/'false'")
 
     def getint(self, name, default=0):
+        """
+        Get a setting value as an int.
+
+        :param name: the setting name
+        :type name: str
+
+        :param default: the value to return if no setting is found
+        :type default: object
+        """
         return int(self.get(name, default))
 
     def getfloat(self, name, default=0.0):
+        """
+        Get a setting value as a float.
+
+        :param name: the setting name
+        :type name: str
+
+        :param default: the value to return if no setting is found
+        :type default: object
+        """
         return float(self.get(name, default))
 
     def getlist(self, name, default=None):
-        value = self.get(name)
-        if value is None:
-            return default or []
-        elif hasattr(value, '__iter__'):
-            return value
-        else:
-            return str(value).split(',')
+        """
+        Get a setting value as a list. If the setting original type is a list, a
+        copy of it will be returned. If it's a string it will be split by ",".
+
+        For example, settings populated through environment variables set to
+        ``'one,two'`` will return a list ['one', 'two'] when using this method.
+
+        :param name: the setting name
+        :type name: str
+
+        :param default: the value to return if no setting is found
+        :type default: object
+        """
+        value = self.get(name, default or [])
+        if isinstance(value, str):
+            value = value.split(',')
+        return list(value)
 
     def getdict(self, name, default=None):
-        value = self.get(name)
-        if value is None:
-            return default or {}
-        if isinstance(value, six.string_types):
+        """
+        Get a setting value as a dictionary. If the setting original type is a
+        dictionary, a copy of it will be returned. If it is a string it will be
+        evaluated as a JSON dictionary. In the case that it is a
+        :class:`~scrapy.settings.BaseSettings` instance itself, it will be
+        converted to a dictionary, containing all its current settings values
+        as they would be returned by :meth:`~scrapy.settings.BaseSettings.get`,
+        and losing all information about priority and mutability.
+
+        :param name: the setting name
+        :type name: str
+
+        :param default: the value to return if no setting is found
+        :type default: object
+        """
+        value = self.get(name, default or {})
+        if isinstance(value, str):
             value = json.loads(value)
-        if isinstance(value, dict):
-            return value
-        raise ValueError("Cannot convert value for setting '%s' to dict: '%s'" % (name, value))
+        return dict(value)
+
+    def getwithbase(self, name):
+        """Get a composition of a dictionary-like setting and its `_BASE`
+        counterpart.
+
+        :param name: name of the dictionary-like setting
+        :type name: str
+        """
+        compbs = BaseSettings()
+        compbs.update(self[name + '_BASE'])
+        compbs.update(self[name])
+        return compbs
+
+    def getpriority(self, name):
+        """
+        Return the current numerical priority value of a setting, or ``None`` if
+        the given ``name`` does not exist.
+
+        :param name: the setting name
+        :type name: str
+        """
+        if name not in self:
+            return None
+        return self.attributes[name].priority
+
+    def maxpriority(self):
+        """
+        Return the numerical value of the highest priority present throughout
+        all settings, or the numerical value for ``default`` from
+        :attr:`~scrapy.settings.SETTINGS_PRIORITIES` if there are no settings
+        stored.
+        """
+        if len(self) > 0:
+            return max(self.getpriority(name) for name in self)
+        else:
+            return get_settings_priority('default')
+
+    def __setitem__(self, name, value):
+        self.set(name, value)
 
     def set(self, name, value, priority='project'):
-        if isinstance(priority, six.string_types):
-            priority = SETTINGS_PRIORITIES[priority]
-        if name not in self.attributes:
-            self.attributes[name] = SettingsAttribute(value, priority)
+        """
+        Store a key/value attribute with a given priority.
+
+        Settings should be populated *before* configuring the Crawler object
+        (through the :meth:`~scrapy.crawler.Crawler.configure` method),
+        otherwise they won't have any effect.
+
+        :param name: the setting name
+        :type name: str
+
+        :param value: the value to associate with the setting
+        :type value: object
+
+        :param priority: the priority of the setting. Should be a key of
+            :attr:`~scrapy.settings.SETTINGS_PRIORITIES` or an integer
+        :type priority: str or int
+        """
+        self._assert_mutability()
+        priority = get_settings_priority(priority)
+        if name not in self:
+            if isinstance(value, SettingsAttribute):
+                self.attributes[name] = value
+            else:
+                self.attributes[name] = SettingsAttribute(value, priority)
         else:
             self.attributes[name].set(value, priority)
 
     def setdict(self, values, priority='project'):
-        for name, value in six.iteritems(values):
-            self.set(name, value, priority)
+        self.update(values, priority)
 
     def setmodule(self, module, priority='project'):
-        if isinstance(module, six.string_types):
+        """
+        Store settings from a module with a given priority.
+
+        This is a helper function that calls
+        :meth:`~scrapy.settings.BaseSettings.set` for every globally declared
+        uppercase variable of ``module`` with the provided ``priority``.
+
+        :param module: the module or the path of the module
+        :type module: types.ModuleType or str
+
+        :param priority: the priority of the settings. Should be a key of
+            :attr:`~scrapy.settings.SETTINGS_PRIORITIES` or an integer
+        :type priority: str or int
+        """
+        self._assert_mutability()
+        if isinstance(module, str):
             module = import_module(module)
         for key in dir(module):
             if key.isupper():
                 self.set(key, getattr(module, key), priority)
 
-    @property
-    def overrides(self):
-        warnings.warn("`Settings.overrides` attribute is deprecated and won't "
-                      "be supported in Scrapy 0.26, use "
-                      "`Settings.set(name, value, priority='cmdline')` instead",
-                      category=ScrapyDeprecationWarning, stacklevel=2)
-        try:
-            o = self._overrides
-        except AttributeError:
-            self._overrides = o = _DictProxy(self, 'cmdline')
-        return o
+    def update(self, values, priority='project'):
+        """
+        Store key/value pairs with a given priority.
 
-    @property
-    def defaults(self):
-        warnings.warn("`Settings.defaults` attribute is deprecated and won't "
-                      "be supported in Scrapy 0.26, use "
-                      "`Settings.set(name, value, priority='default')` instead",
-                      category=ScrapyDeprecationWarning, stacklevel=2)
-        try:
-            o = self._defaults
-        except AttributeError:
-            self._defaults = o = _DictProxy(self, 'default')
-        return o
+        This is a helper function that calls
+        :meth:`~scrapy.settings.BaseSettings.set` for every item of ``values``
+        with the provided ``priority``.
+
+        If ``values`` is a string, it is assumed to be JSON-encoded and parsed
+        into a dict with ``json.loads()`` first. If it is a
+        :class:`~scrapy.settings.BaseSettings` instance, the per-key priorities
+        will be used and the ``priority`` parameter ignored. This allows
+        inserting/updating settings with different priorities with a single
+        command.
+
+        :param values: the settings names and values
+        :type values: dict or string or :class:`~scrapy.settings.BaseSettings`
+
+        :param priority: the priority of the settings. Should be a key of
+            :attr:`~scrapy.settings.SETTINGS_PRIORITIES` or an integer
+        :type priority: str or int
+        """
+        self._assert_mutability()
+        if isinstance(values, str):
+            values = json.loads(values)
+        if values is not None:
+            if isinstance(values, BaseSettings):
+                for name, value in values.items():
+                    self.set(name, value, values.getpriority(name))
+            else:
+                for name, value in values.items():
+                    self.set(name, value, priority)
+
+    def delete(self, name, priority='project'):
+        self._assert_mutability()
+        priority = get_settings_priority(priority)
+        if priority >= self.getpriority(name):
+            del self.attributes[name]
+
+    def __delitem__(self, name):
+        self._assert_mutability()
+        del self.attributes[name]
+
+    def _assert_mutability(self):
+        if self.frozen:
+            raise TypeError("Trying to modify an immutable Settings object")
+
+    def copy(self):
+        """
+        Make a deep copy of current settings.
+
+        This method returns a new instance of the :class:`Settings` class,
+        populated with the same values and their priorities.
+
+        Modifications to the new object won't be reflected on the original
+        settings.
+        """
+        return copy.deepcopy(self)
+
+    def freeze(self):
+        """
+        Disable further changes to the current settings.
+
+        After calling this method, the present state of the settings will become
+        immutable. Trying to change values through the :meth:`~set` method and
+        its variants won't be possible and will be alerted.
+        """
+        self.frozen = True
+
+    def frozencopy(self):
+        """
+        Return an immutable copy of the current settings.
+
+        Alias for a :meth:`~freeze` call in the object returned by :meth:`copy`.
+        """
+        copy = self.copy()
+        copy.freeze()
+        return copy
+
+    def __iter__(self):
+        return iter(self.attributes)
+
+    def __len__(self):
+        return len(self.attributes)
+
+    def _to_dict(self):
+        return {k: (v._to_dict() if isinstance(v, BaseSettings) else v)
+                for k, v in self.items()}
+
+    def copy_to_dict(self):
+        """
+        Make a copy of current settings and convert to a dict.
+
+        This method returns a new dict populated with the same values
+        and their priorities as the current settings.
+
+        Modifications to the returned dict won't be reflected on the original
+        settings.
+
+        This method can be useful for example for printing settings
+        in Scrapy shell.
+        """
+        settings = self.copy()
+        return settings._to_dict()
+
+    def _repr_pretty_(self, p, cycle):
+        if cycle:
+            p.text(repr(self))
+        else:
+            p.text(pformat(self.copy_to_dict()))
 
 
 class _DictProxy(MutableMapping):
@@ -160,27 +425,29 @@ class _DictProxy(MutableMapping):
         return iter(self.o)
 
 
-class CrawlerSettings(Settings):
+class Settings(BaseSettings):
+    """
+    This object stores Scrapy settings for the configuration of internal
+    components, and can be used for any further customization.
 
-    def __init__(self, settings_module=None, **kw):
-        Settings.__init__(self, **kw)
-        self.settings_module = settings_module
+    It is a direct subclass and supports all methods of
+    :class:`~scrapy.settings.BaseSettings`. Additionally, after instantiation
+    of this class, the new object will have the global default settings
+    described on :ref:`topics-settings-ref` already populated.
+    """
 
-    def __getitem__(self, opt_name):
-        if opt_name in self.overrides:
-            return self.overrides[opt_name]
-        if self.settings_module and hasattr(self.settings_module, opt_name):
-            return getattr(self.settings_module, opt_name)
-        if opt_name in self.defaults:
-            return self.defaults[opt_name]
-        return Settings.__getitem__(self, opt_name)
-
-    def __str__(self):
-        return "<CrawlerSettings module=%r>" % self.settings_module
-
-CrawlerSettings = create_deprecated_class(
-    'CrawlerSettings', CrawlerSettings,
-    new_class_path='scrapy.settings.Settings')
+    def __init__(self, values=None, priority='project'):
+        # Do not pass kwarg values here. We don't want to promote user-defined
+        # dicts, and we want to update, not replace, default dicts with the
+        # values given by the user
+        super().__init__()
+        self.setmodule(default_settings, 'default')
+        # Promote default dictionaries to BaseSettings instances for per-key
+        # priorities
+        for name, val in self.items():
+            if isinstance(val, dict):
+                self.set(name, BaseSettings(val, 'default'), 'default')
+        self.update(values, priority)
 
 
 def iter_default_settings():
@@ -189,6 +456,7 @@ def iter_default_settings():
         if name.isupper():
             yield name, getattr(default_settings, name)
 
+
 def overridden_settings(settings):
     """Return a dict of the settings that have been overridden"""
     for name, defvalue in iter_default_settings():
diff --git a/scrapy/settings/default_settings.py b/scrapy/settings/default_settings.py
index f01203c42..a0251394b 100644
--- a/scrapy/settings/default_settings.py
+++ b/scrapy/settings/default_settings.py
@@ -13,13 +13,20 @@ Scrapy developers, if you add a setting here remember to:
 
 """
 
-import os
 import sys
 from importlib import import_module
 from os.path import join, abspath, dirname
 
 AJAXCRAWL_ENABLED = False
 
+ASYNCIO_EVENT_LOOP = None
+
+AUTOTHROTTLE_ENABLED = False
+AUTOTHROTTLE_DEBUG = False
+AUTOTHROTTLE_MAX_DELAY = 60.0
+AUTOTHROTTLE_START_DELAY = 5.0
+AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
+
 BOT_NAME = 'scrapybot'
 
 CLOSESPIDER_TIMEOUT = 0
@@ -48,15 +55,19 @@ DEFAULT_REQUEST_HEADERS = {
 }
 
 DEPTH_LIMIT = 0
-DEPTH_STATS = True
+DEPTH_STATS_VERBOSE = False
 DEPTH_PRIORITY = 0
 
 DNSCACHE_ENABLED = True
+DNSCACHE_SIZE = 10000
+DNS_RESOLVER = 'scrapy.resolver.CachingThreadedResolver'
+DNS_TIMEOUT = 60
 
 DOWNLOAD_DELAY = 0
 
 DOWNLOAD_HANDLERS = {}
 DOWNLOAD_HANDLERS_BASE = {
+    'data': 'scrapy.core.downloader.handlers.datauri.DataURIDownloadHandler',
     'file': 'scrapy.core.downloader.handlers.file.FileDownloadHandler',
     'http': 'scrapy.core.downloader.handlers.http.HTTPDownloadHandler',
     'https': 'scrapy.core.downloader.handlers.http.HTTPDownloadHandler',
@@ -66,93 +77,124 @@ DOWNLOAD_HANDLERS_BASE = {
 
 DOWNLOAD_TIMEOUT = 180      # 3mins
 
+DOWNLOAD_MAXSIZE = 1024 * 1024 * 1024   # 1024m
+DOWNLOAD_WARNSIZE = 32 * 1024 * 1024    # 32m
+
+DOWNLOAD_FAIL_ON_DATALOSS = True
+
 DOWNLOADER = 'scrapy.core.downloader.Downloader'
 
 DOWNLOADER_HTTPCLIENTFACTORY = 'scrapy.core.downloader.webclient.ScrapyHTTPClientFactory'
 DOWNLOADER_CLIENTCONTEXTFACTORY = 'scrapy.core.downloader.contextfactory.ScrapyClientContextFactory'
+DOWNLOADER_CLIENT_TLS_CIPHERS = 'DEFAULT'
+# Use highest TLS/SSL protocol version supported by the platform, also allowing negotiation:
+DOWNLOADER_CLIENT_TLS_METHOD = 'TLS'
+DOWNLOADER_CLIENT_TLS_VERBOSE_LOGGING = False
 
 DOWNLOADER_MIDDLEWARES = {}
 
 DOWNLOADER_MIDDLEWARES_BASE = {
     # Engine side
-    'scrapy.contrib.downloadermiddleware.robotstxt.RobotsTxtMiddleware': 100,
-    'scrapy.contrib.downloadermiddleware.httpauth.HttpAuthMiddleware': 300,
-    'scrapy.contrib.downloadermiddleware.downloadtimeout.DownloadTimeoutMiddleware': 350,
-    'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': 400,
-    'scrapy.contrib.downloadermiddleware.retry.RetryMiddleware': 500,
-    'scrapy.contrib.downloadermiddleware.defaultheaders.DefaultHeadersMiddleware': 550,
-    'scrapy.contrib.downloadermiddleware.ajaxcrawl.AjaxCrawlMiddleware': 560,
-    'scrapy.contrib.downloadermiddleware.redirect.MetaRefreshMiddleware': 580,
-    'scrapy.contrib.downloadermiddleware.httpcompression.HttpCompressionMiddleware': 590,
-    'scrapy.contrib.downloadermiddleware.redirect.RedirectMiddleware': 600,
-    'scrapy.contrib.downloadermiddleware.cookies.CookiesMiddleware': 700,
-    'scrapy.contrib.downloadermiddleware.httpproxy.HttpProxyMiddleware': 750,
-    'scrapy.contrib.downloadermiddleware.chunked.ChunkedTransferMiddleware': 830,
-    'scrapy.contrib.downloadermiddleware.stats.DownloaderStats': 850,
-    'scrapy.contrib.downloadermiddleware.httpcache.HttpCacheMiddleware': 900,
+    'scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware': 100,
+    'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware': 300,
+    'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware': 350,
+    'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware': 400,
+    'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': 500,
+    'scrapy.downloadermiddlewares.retry.RetryMiddleware': 550,
+    'scrapy.downloadermiddlewares.ajaxcrawl.AjaxCrawlMiddleware': 560,
+    'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware': 580,
+    'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 590,
+    'scrapy.downloadermiddlewares.redirect.RedirectMiddleware': 600,
+    'scrapy.downloadermiddlewares.cookies.CookiesMiddleware': 700,
+    'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware': 750,
+    'scrapy.downloadermiddlewares.stats.DownloaderStats': 850,
+    'scrapy.downloadermiddlewares.httpcache.HttpCacheMiddleware': 900,
     # Downloader side
 }
 
 DOWNLOADER_STATS = True
 
-DUPEFILTER_CLASS = 'scrapy.dupefilter.RFPDupeFilter'
+DUPEFILTER_CLASS = 'scrapy.dupefilters.RFPDupeFilter'
 
-try:
-    EDITOR = os.environ['EDITOR']
-except KeyError:
-    if sys.platform == 'win32':
-        EDITOR = '%s -m idlelib.idle'
-    else:
-        EDITOR = 'vi'
+EDITOR = 'vi'
+if sys.platform == 'win32':
+    EDITOR = '%s -m idlelib.idle'
 
 EXTENSIONS = {}
 
 EXTENSIONS_BASE = {
-    'scrapy.contrib.corestats.CoreStats': 0,
-    'scrapy.telnet.TelnetConsole': 0,
-    'scrapy.contrib.memusage.MemoryUsage': 0,
-    'scrapy.contrib.memdebug.MemoryDebugger': 0,
-    'scrapy.contrib.closespider.CloseSpider': 0,
-    'scrapy.contrib.feedexport.FeedExporter': 0,
-    'scrapy.contrib.logstats.LogStats': 0,
-    'scrapy.contrib.spiderstate.SpiderState': 0,
-    'scrapy.contrib.throttle.AutoThrottle': 0,
+    'scrapy.extensions.corestats.CoreStats': 0,
+    'scrapy.extensions.telnet.TelnetConsole': 0,
+    'scrapy.extensions.memusage.MemoryUsage': 0,
+    'scrapy.extensions.memdebug.MemoryDebugger': 0,
+    'scrapy.extensions.closespider.CloseSpider': 0,
+    'scrapy.extensions.feedexport.FeedExporter': 0,
+    'scrapy.extensions.logstats.LogStats': 0,
+    'scrapy.extensions.spiderstate.SpiderState': 0,
+    'scrapy.extensions.throttle.AutoThrottle': 0,
 }
 
-FEED_URI = None
+FEED_TEMPDIR = None
+FEEDS = {}
 FEED_URI_PARAMS = None  # a function to extend uri arguments
-FEED_FORMAT = 'jsonlines'
 FEED_STORE_EMPTY = False
+FEED_EXPORT_ENCODING = None
+FEED_EXPORT_FIELDS = None
 FEED_STORAGES = {}
 FEED_STORAGES_BASE = {
-    '': 'scrapy.contrib.feedexport.FileFeedStorage',
-    'file': 'scrapy.contrib.feedexport.FileFeedStorage',
-    'stdout': 'scrapy.contrib.feedexport.StdoutFeedStorage',
-    's3': 'scrapy.contrib.feedexport.S3FeedStorage',
-    'ftp': 'scrapy.contrib.feedexport.FTPFeedStorage',
+    '': 'scrapy.extensions.feedexport.FileFeedStorage',
+    'file': 'scrapy.extensions.feedexport.FileFeedStorage',
+    'ftp': 'scrapy.extensions.feedexport.FTPFeedStorage',
+    'gs': 'scrapy.extensions.feedexport.GCSFeedStorage',
+    's3': 'scrapy.extensions.feedexport.S3FeedStorage',
+    'stdout': 'scrapy.extensions.feedexport.StdoutFeedStorage',
 }
+FEED_EXPORT_BATCH_ITEM_COUNT = 0
 FEED_EXPORTERS = {}
 FEED_EXPORTERS_BASE = {
-    'json': 'scrapy.contrib.exporter.JsonItemExporter',
-    'jsonlines': 'scrapy.contrib.exporter.JsonLinesItemExporter',
-    'jl': 'scrapy.contrib.exporter.JsonLinesItemExporter',
-    'csv': 'scrapy.contrib.exporter.CsvItemExporter',
-    'xml': 'scrapy.contrib.exporter.XmlItemExporter',
-    'marshal': 'scrapy.contrib.exporter.MarshalItemExporter',
-    'pickle': 'scrapy.contrib.exporter.PickleItemExporter',
+    'json': 'scrapy.exporters.JsonItemExporter',
+    'jsonlines': 'scrapy.exporters.JsonLinesItemExporter',
+    'jl': 'scrapy.exporters.JsonLinesItemExporter',
+    'csv': 'scrapy.exporters.CsvItemExporter',
+    'xml': 'scrapy.exporters.XmlItemExporter',
+    'marshal': 'scrapy.exporters.MarshalItemExporter',
+    'pickle': 'scrapy.exporters.PickleItemExporter',
 }
+FEED_EXPORT_INDENT = 0
+
+FEED_STORAGE_FTP_ACTIVE = False
+FEED_STORAGE_GCS_ACL = ''
+FEED_STORAGE_S3_ACL = ''
+
+FILES_STORE_S3_ACL = 'private'
+FILES_STORE_GCS_ACL = ''
+
+FTP_USER = 'anonymous'
+FTP_PASSWORD = 'guest'
+FTP_PASSIVE_MODE = True
+
+GCS_PROJECT_ID = None
 
 HTTPCACHE_ENABLED = False
 HTTPCACHE_DIR = 'httpcache'
 HTTPCACHE_IGNORE_MISSING = False
-HTTPCACHE_STORAGE = 'scrapy.contrib.httpcache.FilesystemCacheStorage'
+HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
 HTTPCACHE_EXPIRATION_SECS = 0
+HTTPCACHE_ALWAYS_STORE = False
 HTTPCACHE_IGNORE_HTTP_CODES = []
 HTTPCACHE_IGNORE_SCHEMES = ['file']
-HTTPCACHE_DBM_MODULE = 'anydbm'
-HTTPCACHE_POLICY = 'scrapy.contrib.httpcache.DummyPolicy'
+HTTPCACHE_IGNORE_RESPONSE_CACHE_CONTROLS = []
+HTTPCACHE_DBM_MODULE = 'dbm'
+HTTPCACHE_POLICY = 'scrapy.extensions.httpcache.DummyPolicy'
+HTTPCACHE_GZIP = False
 
-ITEM_PROCESSOR = 'scrapy.contrib.pipeline.ItemPipelineManager'
+HTTPPROXY_ENABLED = True
+HTTPPROXY_AUTH_ENCODING = 'latin-1'
+
+IMAGES_STORE_S3_ACL = 'private'
+IMAGES_STORE_GCS_ACL = ''
+
+ITEM_PROCESSOR = 'scrapy.pipelines.ItemPipelineManager'
 
 ITEM_PIPELINES = {}
 ITEM_PIPELINES_BASE = {}
@@ -160,11 +202,14 @@ ITEM_PIPELINES_BASE = {}
 LOG_ENABLED = True
 LOG_ENCODING = 'utf-8'
 LOG_FORMATTER = 'scrapy.logformatter.LogFormatter'
+LOG_FORMAT = '%(asctime)s [%(name)s] %(levelname)s: %(message)s'
+LOG_DATEFORMAT = '%Y-%m-%d %H:%M:%S'
 LOG_STDOUT = False
 LOG_LEVEL = 'DEBUG'
 LOG_FILE = None
+LOG_SHORT_NAMES = False
 
-LOG_UNSERIALIZABLE_REQUESTS = False
+SCHEDULER_DEBUG = False
 
 LOGSTATS_INTERVAL = 60.0
 
@@ -177,53 +222,63 @@ MAIL_USER = None
 MEMDEBUG_ENABLED = False        # enable memory debugging
 MEMDEBUG_NOTIFY = []            # send memory debugging report by mail at engine shutdown
 
-MEMUSAGE_ENABLED = False
+MEMUSAGE_CHECK_INTERVAL_SECONDS = 60.0
+MEMUSAGE_ENABLED = True
 MEMUSAGE_LIMIT_MB = 0
 MEMUSAGE_NOTIFY_MAIL = []
-MEMUSAGE_REPORT = False
 MEMUSAGE_WARNING_MB = 0
 
 METAREFRESH_ENABLED = True
+METAREFRESH_IGNORE_TAGS = []
 METAREFRESH_MAXDELAY = 100
 
 NEWSPIDER_MODULE = ''
 
 RANDOMIZE_DOWNLOAD_DELAY = True
 
+REACTOR_THREADPOOL_MAXSIZE = 10
+
 REDIRECT_ENABLED = True
 REDIRECT_MAX_TIMES = 20  # uses Firefox default setting
 REDIRECT_PRIORITY_ADJUST = +2
 
 REFERER_ENABLED = True
+REFERRER_POLICY = 'scrapy.spidermiddlewares.referer.DefaultReferrerPolicy'
 
 RETRY_ENABLED = True
 RETRY_TIMES = 2  # initial response + 2 retries = 3 requests
-RETRY_HTTP_CODES = [500, 502, 503, 504, 400, 408]
+RETRY_HTTP_CODES = [500, 502, 503, 504, 522, 524, 408, 429]
 RETRY_PRIORITY_ADJUST = -1
 
 ROBOTSTXT_OBEY = False
+ROBOTSTXT_PARSER = 'scrapy.robotstxt.ProtegoRobotParser'
+ROBOTSTXT_USER_AGENT = None
 
 SCHEDULER = 'scrapy.core.scheduler.Scheduler'
-SCHEDULER_DISK_QUEUE = 'scrapy.squeue.PickleLifoDiskQueue'
-SCHEDULER_MEMORY_QUEUE = 'scrapy.squeue.LifoMemoryQueue'
+SCHEDULER_DISK_QUEUE = 'scrapy.squeues.PickleLifoDiskQueue'
+SCHEDULER_MEMORY_QUEUE = 'scrapy.squeues.LifoMemoryQueue'
+SCHEDULER_PRIORITY_QUEUE = 'scrapy.pqueues.ScrapyPriorityQueue'
 
-SPIDER_MANAGER_CLASS = 'scrapy.spidermanager.SpiderManager'
+SCRAPER_SLOT_MAX_ACTIVE_SIZE = 5000000
+
+SPIDER_LOADER_CLASS = 'scrapy.spiderloader.SpiderLoader'
+SPIDER_LOADER_WARN_ONLY = False
 
 SPIDER_MIDDLEWARES = {}
 
 SPIDER_MIDDLEWARES_BASE = {
     # Engine side
-    'scrapy.contrib.spidermiddleware.httperror.HttpErrorMiddleware': 50,
-    'scrapy.contrib.spidermiddleware.offsite.OffsiteMiddleware': 500,
-    'scrapy.contrib.spidermiddleware.referer.RefererMiddleware': 700,
-    'scrapy.contrib.spidermiddleware.urllength.UrlLengthMiddleware': 800,
-    'scrapy.contrib.spidermiddleware.depth.DepthMiddleware': 900,
+    'scrapy.spidermiddlewares.httperror.HttpErrorMiddleware': 50,
+    'scrapy.spidermiddlewares.offsite.OffsiteMiddleware': 500,
+    'scrapy.spidermiddlewares.referer.RefererMiddleware': 700,
+    'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware': 800,
+    'scrapy.spidermiddlewares.depth.DepthMiddleware': 900,
     # Spider side
 }
 
 SPIDER_MODULES = []
 
-STATS_CLASS = 'scrapy.statscol.MemoryStatsCollector'
+STATS_CLASS = 'scrapy.statscollectors.MemoryStatsCollector'
 STATS_DUMP = True
 
 STATSMAILER_RCPTS = []
@@ -232,15 +287,20 @@ TEMPLATES_DIR = abspath(join(dirname(__file__), '..', 'templates'))
 
 URLLENGTH_LIMIT = 2083
 
-USER_AGENT = 'Scrapy/%s (+http://scrapy.org)' % import_module('scrapy').__version__
+USER_AGENT = 'Scrapy/%s (+https://scrapy.org)' % import_module('scrapy').__version__
 
 TELNETCONSOLE_ENABLED = 1
 TELNETCONSOLE_PORT = [6023, 6073]
 TELNETCONSOLE_HOST = '127.0.0.1'
+TELNETCONSOLE_USERNAME = 'scrapy'
+TELNETCONSOLE_PASSWORD = None
+
+TWISTED_REACTOR = None
 
 SPIDER_CONTRACTS = {}
 SPIDER_CONTRACTS_BASE = {
     'scrapy.contracts.default.UrlContract': 1,
+    'scrapy.contracts.default.CallbackKeywordArgumentsContract': 1,
     'scrapy.contracts.default.ReturnsContract': 2,
     'scrapy.contracts.default.ScrapesContract': 3,
 }
diff --git a/scrapy/settings/deprecated.py b/scrapy/settings/deprecated.py
deleted file mode 100644
index c20c35c9c..000000000
--- a/scrapy/settings/deprecated.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import warnings
-from scrapy.exceptions import ScrapyDeprecationWarning
-
-DEPRECATED_SETTINGS = [
-    ('TRACK_REFS', 'no longer needed (trackref is always enabled)'),
-    ('RESPONSE_CLASSES', 'no longer supported'),
-    ('DEFAULT_RESPONSE_ENCODING', 'no longer supported'),
-    ('BOT_VERSION', 'no longer used (user agent defaults to Scrapy now)'),
-    ('ENCODING_ALIASES', 'no longer needed (encoding discovery uses w3lib now)'),
-    ('STATS_ENABLED', 'no longer supported (change STATS_CLASS instead)'),
-    ('SQLITE_DB', 'no longer supported'),
-    ('SELECTORS_BACKEND', 'use SCRAPY_SELECTORS_BACKEND environment variable instead'),
-    ('AUTOTHROTTLE_MIN_DOWNLOAD_DELAY', 'use DOWNLOAD_DELAY instead'),
-    ('AUTOTHROTTLE_MAX_CONCURRENCY', 'use CONCURRENT_REQUESTS_PER_DOMAIN instead'),
-    ('AUTOTHROTTLE_MAX_CONCURRENCY', 'use CONCURRENT_REQUESTS_PER_DOMAIN instead'),
-    ('REDIRECT_MAX_METAREFRESH_DELAY', 'use METAREFRESH_MAXDELAY instead'),
-]
-
-
-def check_deprecated_settings(settings):
-    deprecated = [x for x in DEPRECATED_SETTINGS if settings[x[0]] is not None]
-    if deprecated:
-        msg = "You are using the following settings which are deprecated or obsolete"
-        msg += " (ask scrapy-users@googlegroups.com for alternatives):"
-        msg = msg + "\n    " + "\n    ".join("%s: %s" % x for x in deprecated)
-        warnings.warn(msg, ScrapyDeprecationWarning)
diff --git a/scrapy/shell.py b/scrapy/shell.py
index 74eaef40f..10de119ce 100644
--- a/scrapy/shell.py
+++ b/scrapy/shell.py
@@ -3,31 +3,29 @@
 See documentation in docs/topics/shell.rst
 
 """
-from __future__ import print_function
-
+import os
 import signal
-import warnings
 
-from twisted.internet import reactor, threads, defer
+from itemadapter import is_item
+from twisted.internet import threads, defer
 from twisted.python import threadable
 from w3lib.url import any_to_uri
 
 from scrapy.crawler import Crawler
-from scrapy.exceptions import IgnoreRequest, ScrapyDeprecationWarning
+from scrapy.exceptions import IgnoreRequest
 from scrapy.http import Request, Response
-from scrapy.item import BaseItem
 from scrapy.settings import Settings
-from scrapy.spider import Spider
-from scrapy.utils.console import start_python_console
+from scrapy.spiders import Spider
+from scrapy.utils.conf import get_config
+from scrapy.utils.console import DEFAULT_PYTHON_SHELLS, start_python_console
+from scrapy.utils.datatypes import SequenceExclude
 from scrapy.utils.misc import load_object
 from scrapy.utils.response import open_in_browser
-from scrapy.utils.spider import create_spider_for_request
 
 
-class Shell(object):
+class Shell:
 
-    relevant_classes = (Crawler, Spider, Request, Response, BaseItem,
-                        Settings)
+    relevant_classes = (Crawler, Spider, Request, Response, Settings)
 
     def __init__(self, crawler, update_vars=None, code=None):
         self.crawler = crawler
@@ -38,11 +36,11 @@ class Shell(object):
         self.code = code
         self.vars = {}
 
-    def start(self, url=None, request=None, response=None, spider=None):
+    def start(self, url=None, request=None, response=None, spider=None, redirect=True):
         # disable accidental Ctrl-C key press from shutting down the engine
         signal.signal(signal.SIGINT, signal.SIG_IGN)
         if url:
-            self.fetch(url, spider)
+            self.fetch(url, spider, redirect=redirect)
         elif request:
             self.fetch(request, spider)
         elif response:
@@ -53,7 +51,29 @@ class Shell(object):
         if self.code:
             print(eval(self.code, globals(), self.vars))
         else:
-            start_python_console(self.vars)
+            """
+            Detect interactive shell setting in scrapy.cfg
+            e.g.: ~/.config/scrapy.cfg or ~/.scrapy.cfg
+            [settings]
+            # shell can be one of ipython, bpython or python;
+            # to be used as the interactive python console, if available.
+            # (default is ipython, fallbacks in the order listed above)
+            shell = python
+            """
+            cfg = get_config()
+            section, option = 'settings', 'shell'
+            env = os.environ.get('SCRAPY_PYTHON_SHELL')
+            shells = []
+            if env:
+                shells += env.strip().lower().split(',')
+            elif cfg.has_option(section, option):
+                shells += [cfg.get(section, option).strip().lower()]
+            else:  # try all by default
+                shells += DEFAULT_PYTHON_SHELLS.keys()
+            # always add standard shell as fallback
+            shells += ['python']
+            start_python_console(self.vars, shells=shells,
+                                 banner=self.vars.pop('banner', ''))
 
     def _schedule(self, request, spider):
         spider = self._open_spider(request, spider)
@@ -67,23 +87,24 @@ class Shell(object):
             return self.spider
 
         if spider is None:
-            spider = create_spider_for_request(self.crawler.spiders,
-                                               request,
-                                               Spider('default'),
-                                               log_multiple=True)
-        spider.set_crawler(self.crawler)
+            spider = self.crawler.spider or self.crawler._create_spider()
+
+        self.crawler.spider = spider
         self.crawler.engine.open_spider(spider, close_if_idle=False)
         self.spider = spider
         return spider
 
-    def fetch(self, request_or_url, spider=None):
+    def fetch(self, request_or_url, spider=None, redirect=True, **kwargs):
+        from twisted.internet import reactor
         if isinstance(request_or_url, Request):
             request = request_or_url
-            url = request.url
         else:
             url = any_to_uri(request_or_url)
-            request = Request(url, dont_filter=True)
-            request.meta['handle_httpstatus_all'] = True
+            request = Request(url, dont_filter=True, **kwargs)
+            if redirect:
+                request.meta['handle_httpstatus_list'] = SequenceExclude(range(300, 400))
+            else:
+                request.meta['handle_httpstatus_all'] = True
         response = None
         try:
             response, spider = threads.blockingCallFromThread(
@@ -93,43 +114,51 @@ class Shell(object):
         self.populate_vars(response, request, spider)
 
     def populate_vars(self, response=None, request=None, spider=None):
+        import scrapy
+
+        self.vars['scrapy'] = scrapy
         self.vars['crawler'] = self.crawler
         self.vars['item'] = self.item_class()
         self.vars['settings'] = self.crawler.settings
         self.vars['spider'] = spider
         self.vars['request'] = request
         self.vars['response'] = response
-        self.vars['sel'] = _SelectorProxy(response)
         if self.inthread:
             self.vars['fetch'] = self.fetch
         self.vars['view'] = open_in_browser
         self.vars['shelp'] = self.print_help
         self.update_vars(self.vars)
         if not self.code:
-            self.print_help()
+            self.vars['banner'] = self.get_help()
 
     def print_help(self):
-        self.p("Available Scrapy objects:")
+        print(self.get_help())
+
+    def get_help(self):
+        b = []
+        b.append("Available Scrapy objects:")
+        b.append("  scrapy     scrapy module (contains scrapy.Request, scrapy.Selector, etc)")
         for k, v in sorted(self.vars.items()):
             if self._is_relevant(v):
-                self.p("  %-10s %s" % (k, v))
-        self.p("Useful shortcuts:")
-        self.p("  shelp()           Shell help (print this help)")
+                b.append("  %-10s %s" % (k, v))
+        b.append("Useful shortcuts:")
         if self.inthread:
-            self.p("  fetch(req_or_url) Fetch request (or URL) and update local objects")
-        self.p("  view(response)    View response in a browser")
+            b.append("  fetch(url[, redirect=True]) "
+                     "Fetch URL and update local objects (by default, redirects are followed)")
+            b.append("  fetch(req)                  "
+                     "Fetch a scrapy.Request and update local objects ")
+        b.append("  shelp()           Shell help (print this help)")
+        b.append("  view(response)    View response in a browser")
 
-    def p(self, line=''):
-        print("[s] %s" % line)
+        return "\n".join("[s] %s" % line for line in b)
 
     def _is_relevant(self, value):
-        return isinstance(value, self.relevant_classes)
+        return isinstance(value, self.relevant_classes) or is_item(value)
 
 
-def inspect_response(response, spider=None):
+def inspect_response(response, spider):
     """Open a shell to inspect the given response"""
-    from scrapy.project import crawler
-    Shell(crawler).start(response=response, spider=spider)
+    Shell(spider.crawler).start(response=response, spider=spider)
 
 
 def _request_deferred(request):
@@ -139,12 +168,13 @@ def _request_deferred(request):
 
     This returns a Deferred whose first pair of callbacks are the request
     callback and errback. The Deferred also triggers when the request
-    callback/errback is executed (ie. when the request is downloaded)
+    callback/errback is executed (i.e. when the request is downloaded)
 
     WARNING: Do not call request.replace() until after the deferred is called.
     """
     request_callback = request.callback
     request_errback = request.errback
+
     def _restore_callbacks(result):
         request.callback = request_callback
         request.errback = request_errback
@@ -157,15 +187,3 @@ def _request_deferred(request):
 
     request.callback, request.errback = d.callback, d.errback
     return d
-
-
-class _SelectorProxy(object):
-
-    def __init__(self, response):
-        self._proxiedresponse = response
-
-    def __getattr__(self, name):
-        warnings.warn('"sel" shortcut is deprecated. Use "response.xpath()", '
-                      '"response.css()" or "response.selector" instead',
-                      category=ScrapyDeprecationWarning, stacklevel=2)
-        return getattr(self._proxiedresponse.selector, name)
diff --git a/scrapy/signalmanager.py b/scrapy/signalmanager.py
index 4a3e3d92d..ac4044c64 100644
--- a/scrapy/signalmanager.py
+++ b/scrapy/signalmanager.py
@@ -1,27 +1,68 @@
-from scrapy.xlib.pydispatch import dispatcher
-from scrapy.utils import signal
+from pydispatch import dispatcher
+from scrapy.utils import signal as _signal
 
-class SignalManager(object):
+
+class SignalManager:
 
     def __init__(self, sender=dispatcher.Anonymous):
         self.sender = sender
 
-    def connect(self, *a, **kw):
-        kw.setdefault('sender', self.sender)
-        return dispatcher.connect(*a, **kw)
+    def connect(self, receiver, signal, **kwargs):
+        """
+        Connect a receiver function to a signal.
 
-    def disconnect(self, *a, **kw):
-        kw.setdefault('sender', self.sender)
-        return dispatcher.disconnect(*a, **kw)
+        The signal can be any object, although Scrapy comes with some
+        predefined signals that are documented in the :ref:`topics-signals`
+        section.
 
-    def send_catch_log(self, *a, **kw):
-        kw.setdefault('sender', self.sender)
-        return signal.send_catch_log(*a, **kw)
+        :param receiver: the function to be connected
+        :type receiver: collections.abc.Callable
 
-    def send_catch_log_deferred(self, *a, **kw):
-        kw.setdefault('sender', self.sender)
-        return signal.send_catch_log_deferred(*a, **kw)
+        :param signal: the signal to connect to
+        :type signal: object
+        """
+        kwargs.setdefault('sender', self.sender)
+        return dispatcher.connect(receiver, signal, **kwargs)
 
-    def disconnect_all(self, *a, **kw):
-        kw.setdefault('sender', self.sender)
-        return signal.disconnect_all(*a, **kw)
+    def disconnect(self, receiver, signal, **kwargs):
+        """
+        Disconnect a receiver function from a signal. This has the
+        opposite effect of the :meth:`connect` method, and the arguments
+        are the same.
+        """
+        kwargs.setdefault('sender', self.sender)
+        return dispatcher.disconnect(receiver, signal, **kwargs)
+
+    def send_catch_log(self, signal, **kwargs):
+        """
+        Send a signal, catch exceptions and log them.
+
+        The keyword arguments are passed to the signal handlers (connected
+        through the :meth:`connect` method).
+        """
+        kwargs.setdefault('sender', self.sender)
+        return _signal.send_catch_log(signal, **kwargs)
+
+    def send_catch_log_deferred(self, signal, **kwargs):
+        """
+        Like :meth:`send_catch_log` but supports returning
+        :class:`~twisted.internet.defer.Deferred` objects from signal handlers.
+
+        Returns a Deferred that gets fired once all signal handlers
+        deferreds were fired. Send a signal, catch exceptions and log them.
+
+        The keyword arguments are passed to the signal handlers (connected
+        through the :meth:`connect` method).
+        """
+        kwargs.setdefault('sender', self.sender)
+        return _signal.send_catch_log_deferred(signal, **kwargs)
+
+    def disconnect_all(self, signal, **kwargs):
+        """
+        Disconnect all receivers from the given signal.
+
+        :param signal: the signal to disconnect from
+        :type signal: object
+        """
+        kwargs.setdefault('sender', self.sender)
+        return _signal.disconnect_all(signal, **kwargs)
diff --git a/scrapy/signals.py b/scrapy/signals.py
index 11bbae945..c61ae6ec3 100644
--- a/scrapy/signals.py
+++ b/scrapy/signals.py
@@ -12,12 +12,17 @@ spider_idle = object()
 spider_closed = object()
 spider_error = object()
 request_scheduled = object()
+request_dropped = object()
+request_reached_downloader = object()
+request_left_downloader = object()
 response_received = object()
 response_downloaded = object()
+bytes_received = object()
 item_scraped = object()
 item_dropped = object()
+item_error = object()
 
-# for backwards compatibility
+# for backward compatibility
 stats_spider_opened = spider_opened
 stats_spider_closing = spider_closed
 stats_spider_closed = spider_closed
diff --git a/scrapy/spider.py b/scrapy/spider.py
deleted file mode 100644
index 8ecfae2a0..000000000
--- a/scrapy/spider.py
+++ /dev/null
@@ -1,81 +0,0 @@
-"""
-Base class for Scrapy spiders
-
-See documentation in docs/topics/spiders.rst
-"""
-from scrapy import log
-from scrapy.http import Request
-from scrapy.utils.trackref import object_ref
-from scrapy.utils.url import url_is_from_spider
-from scrapy.utils.deprecate import create_deprecated_class
-
-
-class Spider(object_ref):
-    """Base class for scrapy spiders. All spiders must inherit from this
-    class.
-    """
-
-    name = None
-
-    def __init__(self, name=None, **kwargs):
-        if name is not None:
-            self.name = name
-        elif not getattr(self, 'name', None):
-            raise ValueError("%s must have a name" % type(self).__name__)
-        self.__dict__.update(kwargs)
-        if not hasattr(self, 'start_urls'):
-            self.start_urls = []
-
-    def log(self, message, level=log.DEBUG, **kw):
-        """Log the given messages at the given log level. Always use this
-        method to send log messages from your spider
-        """
-        log.msg(message, spider=self, level=level, **kw)
-
-    def set_crawler(self, crawler):
-        assert not hasattr(self, '_crawler'), "Spider already bounded to %s" % crawler
-        self._crawler = crawler
-
-    @property
-    def crawler(self):
-        assert hasattr(self, '_crawler'), "Spider not bounded to any crawler"
-        return self._crawler
-
-    @property
-    def settings(self):
-        return self.crawler.settings
-
-    def start_requests(self):
-        for url in self.start_urls:
-            yield self.make_requests_from_url(url)
-
-    def make_requests_from_url(self, url):
-        return Request(url, dont_filter=True)
-
-    def parse(self, response):
-        raise NotImplementedError
-
-    @classmethod
-    def handles_request(cls, request):
-        return url_is_from_spider(request.url, cls)
-
-    def __str__(self):
-        return "<%s %r at 0x%0x>" % (type(self).__name__, self.name, id(self))
-
-    __repr__ = __str__
-
-
-BaseSpider = create_deprecated_class('BaseSpider', Spider)
-
-
-class ObsoleteClass(object):
-    def __init__(self, message):
-        self.message = message
-
-    def __getattr__(self, name):
-        raise AttributeError(self.message)
-
-spiders = ObsoleteClass("""
-"from scrapy.spider import spiders" no longer works - use "from scrapy.project import crawler" and then access crawler.spiders attribute"
-""")
-
diff --git a/scrapy/spiderloader.py b/scrapy/spiderloader.py
new file mode 100644
index 000000000..db4193430
--- /dev/null
+++ b/scrapy/spiderloader.py
@@ -0,0 +1,93 @@
+import traceback
+import warnings
+from collections import defaultdict
+
+from zope.interface import implementer
+
+from scrapy.interfaces import ISpiderLoader
+from scrapy.utils.misc import walk_modules
+from scrapy.utils.spider import iter_spider_classes
+
+
+@implementer(ISpiderLoader)
+class SpiderLoader:
+    """
+    SpiderLoader is a class which locates and loads spiders
+    in a Scrapy project.
+    """
+
+    def __init__(self, settings):
+        self.spider_modules = settings.getlist('SPIDER_MODULES')
+        self.warn_only = settings.getbool('SPIDER_LOADER_WARN_ONLY')
+        self._spiders = {}
+        self._found = defaultdict(list)
+        self._load_all_spiders()
+
+    def _check_name_duplicates(self):
+        dupes = []
+        for name, locations in self._found.items():
+            dupes.extend([
+                "  {cls} named {name!r} (in {module})".format(module=mod, cls=cls, name=name)
+                for mod, cls in locations
+                if len(locations) > 1
+            ])
+
+        if dupes:
+            dupes_string = "\n\n".join(dupes)
+            warnings.warn(
+                "There are several spiders with the same name:\n\n"
+                "{}\n\n  This can cause unexpected behavior.".format(dupes_string),
+                category=UserWarning,
+            )
+
+    def _load_spiders(self, module):
+        for spcls in iter_spider_classes(module):
+            self._found[spcls.name].append((module.__name__, spcls.__name__))
+            self._spiders[spcls.name] = spcls
+
+    def _load_all_spiders(self):
+        for name in self.spider_modules:
+            try:
+                for module in walk_modules(name):
+                    self._load_spiders(module)
+            except ImportError:
+                if self.warn_only:
+                    warnings.warn(
+                        "\n{tb}Could not load spiders from module '{modname}'. "
+                        "See above traceback for details.".format(
+                            modname=name, tb=traceback.format_exc()
+                        ),
+                        category=RuntimeWarning,
+                    )
+                else:
+                    raise
+        self._check_name_duplicates()
+
+    @classmethod
+    def from_settings(cls, settings):
+        return cls(settings)
+
+    def load(self, spider_name):
+        """
+        Return the Spider class for the given spider name. If the spider
+        name is not found, raise a KeyError.
+        """
+        try:
+            return self._spiders[spider_name]
+        except KeyError:
+            raise KeyError("Spider not found: {}".format(spider_name))
+
+    def find_by_request(self, request):
+        """
+        Return the list of spider names that can handle the given request.
+        """
+        return [
+            name for name, cls in self._spiders.items()
+            if cls.handles_request(request)
+        ]
+
+    def list(self):
+        """
+        Return a list with the names of all spiders available in the project.
+        """
+        return list(self._spiders.keys())
diff --git a/scrapy/spidermanager.py b/scrapy/spidermanager.py
deleted file mode 100644
index 5a0951cb4..000000000
--- a/scrapy/spidermanager.py
+++ /dev/null
@@ -1,60 +0,0 @@
-"""
-SpiderManager is the class which locates and manages all website-specific
-spiders
-"""
-
-from zope.interface import implementer
-import six
-
-from scrapy import signals
-from scrapy.interfaces import ISpiderManager
-from scrapy.utils.misc import walk_modules
-from scrapy.utils.spider import iter_spider_classes
-
-
-@implementer(ISpiderManager)
-class SpiderManager(object):
-
-    def __init__(self, spider_modules):
-        self.spider_modules = spider_modules
-        self._spiders = {}
-        for name in self.spider_modules:
-            for module in walk_modules(name):
-                self._load_spiders(module)
-
-    def _load_spiders(self, module):
-        for spcls in iter_spider_classes(module):
-            self._spiders[spcls.name] = spcls
-
-    @classmethod
-    def from_settings(cls, settings):
-        return cls(settings.getlist('SPIDER_MODULES'))
-
-    @classmethod
-    def from_crawler(cls, crawler):
-        sm = cls.from_settings(crawler.settings)
-        sm.crawler = crawler
-        crawler.signals.connect(sm.close_spider, signals.spider_closed)
-        return sm
-
-    def create(self, spider_name, **spider_kwargs):
-        try:
-            spcls = self._spiders[spider_name]
-        except KeyError:
-            raise KeyError("Spider not found: %s" % spider_name)
-        if hasattr(self, 'crawler') and hasattr(spcls, 'from_crawler'):
-            return spcls.from_crawler(self.crawler, **spider_kwargs)
-        else:
-            return spcls(**spider_kwargs)
-
-    def find_by_request(self, request):
-        return [name for name, cls in six.iteritems(self._spiders)
-            if cls.handles_request(request)]
-
-    def list(self):
-        return self._spiders.keys()
-
-    def close_spider(self, spider, reason):
-        closed = getattr(spider, 'closed', None)
-        if callable(closed):
-            return closed(reason)
diff --git a/scrapy/contrib/spidermiddleware/__init__.py b/scrapy/spidermiddlewares/__init__.py
similarity index 100%
rename from scrapy/contrib/spidermiddleware/__init__.py
rename to scrapy/spidermiddlewares/__init__.py
diff --git a/scrapy/contrib/spidermiddleware/depth.py b/scrapy/spidermiddlewares/depth.py
similarity index 69%
rename from scrapy/contrib/spidermiddleware/depth.py
rename to scrapy/spidermiddlewares/depth.py
index 5ccfc86ed..fa7f5bef9 100644
--- a/scrapy/contrib/spidermiddleware/depth.py
+++ b/scrapy/spidermiddlewares/depth.py
@@ -4,12 +4,16 @@ Depth Spider Middleware
 See documentation in docs/topics/spider-middleware.rst
 """
 
-from scrapy import log
+import logging
+
 from scrapy.http import Request
 
-class DepthMiddleware(object):
+logger = logging.getLogger(__name__)
 
-    def __init__(self, maxdepth, stats=None, verbose_stats=False, prio=1):
+
+class DepthMiddleware:
+
+    def __init__(self, maxdepth, stats, verbose_stats=False, prio=1):
         self.maxdepth = maxdepth
         self.stats = stats
         self.verbose_stats = verbose_stats
@@ -31,18 +35,22 @@ class DepthMiddleware(object):
                 if self.prio:
                     request.priority -= depth * self.prio
                 if self.maxdepth and depth > self.maxdepth:
-                    log.msg(format="Ignoring link (depth > %(maxdepth)d): %(requrl)s ",
-                            level=log.DEBUG, spider=spider,
-                            maxdepth=self.maxdepth, requrl=request.url)
+                    logger.debug(
+                        "Ignoring link (depth > %(maxdepth)d): %(requrl)s ",
+                        {'maxdepth': self.maxdepth, 'requrl': request.url},
+                        extra={'spider': spider}
+                    )
                     return False
-                elif self.stats:
+                else:
                     if self.verbose_stats:
-                        self.stats.inc_value('request_depth_count/%s' % depth, spider=spider)
-                    self.stats.max_value('request_depth_max', depth, spider=spider)
+                        self.stats.inc_value('request_depth_count/%s' % depth,
+                                             spider=spider)
+                    self.stats.max_value('request_depth_max', depth,
+                                         spider=spider)
             return True
 
         # base case (depth=0)
-        if self.stats and 'depth' not in response.meta:
+        if 'depth' not in response.meta:
             response.meta['depth'] = 0
             if self.verbose_stats:
                 self.stats.inc_value('request_depth_count/0', spider=spider)
diff --git a/scrapy/contrib/spidermiddleware/httperror.py b/scrapy/spidermiddlewares/httperror.py
similarity index 72%
rename from scrapy/contrib/spidermiddleware/httperror.py
rename to scrapy/spidermiddlewares/httperror.py
index 7fb7aa97c..db9d0f2ae 100644
--- a/scrapy/contrib/spidermiddleware/httperror.py
+++ b/scrapy/spidermiddlewares/httperror.py
@@ -3,18 +3,22 @@ HttpError Spider Middleware
 
 See documentation in docs/topics/spider-middleware.rst
 """
+import logging
+
 from scrapy.exceptions import IgnoreRequest
-from scrapy import log
+
+logger = logging.getLogger(__name__)
+
 
 class HttpError(IgnoreRequest):
     """A non-200 response was filtered"""
 
     def __init__(self, response, *args, **kwargs):
         self.response = response
-        super(HttpError, self).__init__(*args, **kwargs)
+        super().__init__(*args, **kwargs)
 
 
-class HttpErrorMiddleware(object):
+class HttpErrorMiddleware:
 
     @classmethod
     def from_crawler(cls, crawler):
@@ -42,10 +46,12 @@ class HttpErrorMiddleware(object):
 
     def process_spider_exception(self, response, exception, spider):
         if isinstance(exception, HttpError):
-            log.msg(
-                format="Ignoring response %(response)r: HTTP status code is not handled or not allowed",
-                level=log.DEBUG,
-                spider=spider,
-                response=response
+            spider.crawler.stats.inc_value('httperror/response_ignored_count')
+            spider.crawler.stats.inc_value(
+                'httperror/response_ignored_status_count/%s' % response.status
+            )
+            logger.info(
+                "Ignoring response %(response)r: HTTP status code is not handled or not allowed",
+                {'response': response}, extra={'spider': spider},
             )
             return []
diff --git a/scrapy/contrib/spidermiddleware/offsite.py b/scrapy/spidermiddlewares/offsite.py
similarity index 56%
rename from scrapy/contrib/spidermiddleware/offsite.py
rename to scrapy/spidermiddlewares/offsite.py
index 136714508..a006f3177 100644
--- a/scrapy/contrib/spidermiddleware/offsite.py
+++ b/scrapy/spidermiddlewares/offsite.py
@@ -3,15 +3,18 @@ Offsite Spider Middleware
 
 See documentation in docs/topics/spider-middleware.rst
 """
-
 import re
+import logging
+import warnings
 
 from scrapy import signals
 from scrapy.http import Request
 from scrapy.utils.httpobj import urlparse_cached
-from scrapy import log
 
-class OffsiteMiddleware(object):
+logger = logging.getLogger(__name__)
+
+
+class OffsiteMiddleware:
 
     def __init__(self, stats):
         self.stats = stats
@@ -31,8 +34,9 @@ class OffsiteMiddleware(object):
                     domain = urlparse_cached(x).hostname
                     if domain and domain not in self.domains_seen:
                         self.domains_seen.add(domain)
-                        log.msg(format="Filtered offsite request to %(domain)r: %(request)s",
-                                level=log.DEBUG, spider=spider, domain=domain, request=x)
+                        logger.debug(
+                            "Filtered offsite request to %(domain)r: %(request)s",
+                            {'domain': domain, 'request': x}, extra={'spider': spider})
                         self.stats.inc_value('offsite/domains', spider=spider)
                     self.stats.inc_value('offsite/filtered', spider=spider)
             else:
@@ -48,10 +52,34 @@ class OffsiteMiddleware(object):
         """Override this method to implement a different offsite policy"""
         allowed_domains = getattr(spider, 'allowed_domains', None)
         if not allowed_domains:
-            return re.compile('') # allow all by default
-        regex = r'^(.*\.)?(%s)$' % '|'.join(re.escape(d) for d in allowed_domains if d is not None)
+            return re.compile('')  # allow all by default
+        url_pattern = re.compile(r"^https?://.*$")
+        port_pattern = re.compile(r":\d+$")
+        domains = []
+        for domain in allowed_domains:
+            if domain is None:
+                continue
+            elif url_pattern.match(domain):
+                message = ("allowed_domains accepts only domains, not URLs. "
+                           "Ignoring URL entry %s in allowed_domains." % domain)
+                warnings.warn(message, URLWarning)
+            elif port_pattern.search(domain):
+                message = ("allowed_domains accepts only domains without ports. "
+                           "Ignoring entry %s in allowed_domains." % domain)
+                warnings.warn(message, PortWarning)
+            else:
+                domains.append(re.escape(domain))
+        regex = r'^(.*\.)?(%s)$' % '|'.join(domains)
         return re.compile(regex)
 
     def spider_opened(self, spider):
         self.host_regex = self.get_host_regex(spider)
         self.domains_seen = set()
+
+
+class URLWarning(Warning):
+    pass
+
+
+class PortWarning(Warning):
+    pass
diff --git a/scrapy/spidermiddlewares/referer.py b/scrapy/spidermiddlewares/referer.py
new file mode 100644
index 000000000..434067b00
--- /dev/null
+++ b/scrapy/spidermiddlewares/referer.py
@@ -0,0 +1,361 @@
+"""
+RefererMiddleware: populates Request referer field, based on the Response which
+originated it.
+"""
+import warnings
+from urllib.parse import urlparse
+
+from w3lib.url import safe_url_string
+
+from scrapy.http import Request, Response
+from scrapy.exceptions import NotConfigured
+from scrapy import signals
+from scrapy.utils.python import to_unicode
+from scrapy.utils.misc import load_object
+from scrapy.utils.url import strip_url
+
+
+LOCAL_SCHEMES = ('about', 'blob', 'data', 'filesystem',)
+
+POLICY_NO_REFERRER = "no-referrer"
+POLICY_NO_REFERRER_WHEN_DOWNGRADE = "no-referrer-when-downgrade"
+POLICY_SAME_ORIGIN = "same-origin"
+POLICY_ORIGIN = "origin"
+POLICY_STRICT_ORIGIN = "strict-origin"
+POLICY_ORIGIN_WHEN_CROSS_ORIGIN = "origin-when-cross-origin"
+POLICY_STRICT_ORIGIN_WHEN_CROSS_ORIGIN = "strict-origin-when-cross-origin"
+POLICY_UNSAFE_URL = "unsafe-url"
+POLICY_SCRAPY_DEFAULT = "scrapy-default"
+
+
+class ReferrerPolicy:
+
+    NOREFERRER_SCHEMES = LOCAL_SCHEMES
+
+    def referrer(self, response_url, request_url):
+        raise NotImplementedError()
+
+    def stripped_referrer(self, url):
+        if urlparse(url).scheme not in self.NOREFERRER_SCHEMES:
+            return self.strip_url(url)
+
+    def origin_referrer(self, url):
+        if urlparse(url).scheme not in self.NOREFERRER_SCHEMES:
+            return self.origin(url)
+
+    def strip_url(self, url, origin_only=False):
+        """
+        https://www.w3.org/TR/referrer-policy/#strip-url
+
+        If url is null, return no referrer.
+        If url's scheme is a local scheme, then return no referrer.
+        Set url's username to the empty string.
+        Set url's password to null.
+        Set url's fragment to null.
+        If the origin-only flag is true, then:
+            Set url's path to null.
+            Set url's query to null.
+        Return url.
+        """
+        if not url:
+            return None
+        return strip_url(url,
+                         strip_credentials=True,
+                         strip_fragment=True,
+                         strip_default_port=True,
+                         origin_only=origin_only)
+
+    def origin(self, url):
+        """Return serialized origin (scheme, host, path) for a request or response URL."""
+        return self.strip_url(url, origin_only=True)
+
+    def potentially_trustworthy(self, url):
+        # Note: this does not follow https://w3c.github.io/webappsec-secure-contexts/#is-url-trustworthy
+        parsed_url = urlparse(url)
+        if parsed_url.scheme in ('data',):
+            return False
+        return self.tls_protected(url)
+
+    def tls_protected(self, url):
+        return urlparse(url).scheme in ('https', 'ftps')
+
+
+class NoReferrerPolicy(ReferrerPolicy):
+    """
+    https://www.w3.org/TR/referrer-policy/#referrer-policy-no-referrer
+
+    The simplest policy is "no-referrer", which specifies that no referrer information
+    is to be sent along with requests made from a particular request client to any origin.
+    The header will be omitted entirely.
+    """
+    name = POLICY_NO_REFERRER
+
+    def referrer(self, response_url, request_url):
+        return None
+
+
+class NoReferrerWhenDowngradePolicy(ReferrerPolicy):
+    """
+    https://www.w3.org/TR/referrer-policy/#referrer-policy-no-referrer-when-downgrade
+
+    The "no-referrer-when-downgrade" policy sends a full URL along with requests
+    from a TLS-protected environment settings object to a potentially trustworthy URL,
+    and requests from clients which are not TLS-protected to any origin.
+
+    Requests from TLS-protected clients to non-potentially trustworthy URLs,
+    on the other hand, will contain no referrer information.
+    A Referer HTTP header will not be sent.
+
+    This is a user agent's default behavior, if no policy is otherwise specified.
+    """
+    name = POLICY_NO_REFERRER_WHEN_DOWNGRADE
+
+    def referrer(self, response_url, request_url):
+        if not self.tls_protected(response_url) or self.tls_protected(request_url):
+            return self.stripped_referrer(response_url)
+
+
+class SameOriginPolicy(ReferrerPolicy):
+    """
+    https://www.w3.org/TR/referrer-policy/#referrer-policy-same-origin
+
+    The "same-origin" policy specifies that a full URL, stripped for use as a referrer,
+    is sent as referrer information when making same-origin requests from a particular request client.
+
+    Cross-origin requests, on the other hand, will contain no referrer information.
+    A Referer HTTP header will not be sent.
+    """
+    name = POLICY_SAME_ORIGIN
+
+    def referrer(self, response_url, request_url):
+        if self.origin(response_url) == self.origin(request_url):
+            return self.stripped_referrer(response_url)
+
+
+class OriginPolicy(ReferrerPolicy):
+    """
+    https://www.w3.org/TR/referrer-policy/#referrer-policy-origin
+
+    The "origin" policy specifies that only the ASCII serialization
+    of the origin of the request client is sent as referrer information
+    when making both same-origin requests and cross-origin requests
+    from a particular request client.
+    """
+    name = POLICY_ORIGIN
+
+    def referrer(self, response_url, request_url):
+        return self.origin_referrer(response_url)
+
+
+class StrictOriginPolicy(ReferrerPolicy):
+    """
+    https://www.w3.org/TR/referrer-policy/#referrer-policy-strict-origin
+
+    The "strict-origin" policy sends the ASCII serialization
+    of the origin of the request client when making requests:
+    - from a TLS-protected environment settings object to a potentially trustworthy URL, and
+    - from non-TLS-protected environment settings objects to any origin.
+
+    Requests from TLS-protected request clients to non- potentially trustworthy URLs,
+    on the other hand, will contain no referrer information.
+    A Referer HTTP header will not be sent.
+    """
+    name = POLICY_STRICT_ORIGIN
+
+    def referrer(self, response_url, request_url):
+        if (
+            self.tls_protected(response_url) and self.potentially_trustworthy(request_url)
+            or not self.tls_protected(response_url)
+        ):
+            return self.origin_referrer(response_url)
+
+
+class OriginWhenCrossOriginPolicy(ReferrerPolicy):
+    """
+    https://www.w3.org/TR/referrer-policy/#referrer-policy-origin-when-cross-origin
+
+    The "origin-when-cross-origin" policy specifies that a full URL,
+    stripped for use as a referrer, is sent as referrer information
+    when making same-origin requests from a particular request client,
+    and only the ASCII serialization of the origin of the request client
+    is sent as referrer information when making cross-origin requests
+    from a particular request client.
+    """
+    name = POLICY_ORIGIN_WHEN_CROSS_ORIGIN
+
+    def referrer(self, response_url, request_url):
+        origin = self.origin(response_url)
+        if origin == self.origin(request_url):
+            return self.stripped_referrer(response_url)
+        else:
+            return origin
+
+
+class StrictOriginWhenCrossOriginPolicy(ReferrerPolicy):
+    """
+    https://www.w3.org/TR/referrer-policy/#referrer-policy-strict-origin-when-cross-origin
+
+    The "strict-origin-when-cross-origin" policy specifies that a full URL,
+    stripped for use as a referrer, is sent as referrer information
+    when making same-origin requests from a particular request client,
+    and only the ASCII serialization of the origin of the request client
+    when making cross-origin requests:
+
+    - from a TLS-protected environment settings object to a potentially trustworthy URL, and
+    - from non-TLS-protected environment settings objects to any origin.
+
+    Requests from TLS-protected clients to non- potentially trustworthy URLs,
+    on the other hand, will contain no referrer information.
+    A Referer HTTP header will not be sent.
+    """
+    name = POLICY_STRICT_ORIGIN_WHEN_CROSS_ORIGIN
+
+    def referrer(self, response_url, request_url):
+        origin = self.origin(response_url)
+        if origin == self.origin(request_url):
+            return self.stripped_referrer(response_url)
+        elif (
+            self.tls_protected(response_url) and self.potentially_trustworthy(request_url)
+            or not self.tls_protected(response_url)
+        ):
+            return self.origin_referrer(response_url)
+
+
+class UnsafeUrlPolicy(ReferrerPolicy):
+    """
+    https://www.w3.org/TR/referrer-policy/#referrer-policy-unsafe-url
+
+    The "unsafe-url" policy specifies that a full URL, stripped for use as a referrer,
+    is sent along with both cross-origin requests
+    and same-origin requests made from a particular request client.
+
+    Note: The policy's name doesn't lie; it is unsafe.
+    This policy will leak origins and paths from TLS-protected resources
+    to insecure origins.
+    Carefully consider the impact of setting such a policy for potentially sensitive documents.
+    """
+    name = POLICY_UNSAFE_URL
+
+    def referrer(self, response_url, request_url):
+        return self.stripped_referrer(response_url)
+
+
+class DefaultReferrerPolicy(NoReferrerWhenDowngradePolicy):
+    """
+    A variant of "no-referrer-when-downgrade",
+    with the addition that "Referer" is not sent if the parent request was
+    using ``file://`` or ``s3://`` scheme.
+    """
+    NOREFERRER_SCHEMES = LOCAL_SCHEMES + ('file', 's3')
+    name = POLICY_SCRAPY_DEFAULT
+
+
+_policy_classes = {p.name: p for p in (
+    NoReferrerPolicy,
+    NoReferrerWhenDowngradePolicy,
+    SameOriginPolicy,
+    OriginPolicy,
+    StrictOriginPolicy,
+    OriginWhenCrossOriginPolicy,
+    StrictOriginWhenCrossOriginPolicy,
+    UnsafeUrlPolicy,
+    DefaultReferrerPolicy,
+)}
+
+# Reference: https://www.w3.org/TR/referrer-policy/#referrer-policy-empty-string
+_policy_classes[''] = NoReferrerWhenDowngradePolicy
+
+
+def _load_policy_class(policy, warning_only=False):
+    """
+    Expect a string for the path to the policy class,
+    otherwise try to interpret the string as a standard value
+    from https://www.w3.org/TR/referrer-policy/#referrer-policies
+    """
+    try:
+        return load_object(policy)
+    except ValueError:
+        try:
+            return _policy_classes[policy.lower()]
+        except KeyError:
+            msg = "Could not load referrer policy %r" % policy
+            if not warning_only:
+                raise RuntimeError(msg)
+            else:
+                warnings.warn(msg, RuntimeWarning)
+                return None
+
+
+class RefererMiddleware:
+
+    def __init__(self, settings=None):
+        self.default_policy = DefaultReferrerPolicy
+        if settings is not None:
+            self.default_policy = _load_policy_class(
+                settings.get('REFERRER_POLICY'))
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        if not crawler.settings.getbool('REFERER_ENABLED'):
+            raise NotConfigured
+        mw = cls(crawler.settings)
+
+        # Note: this hook is a bit of a hack to intercept redirections
+        crawler.signals.connect(mw.request_scheduled, signal=signals.request_scheduled)
+
+        return mw
+
+    def policy(self, resp_or_url, request):
+        """
+        Determine Referrer-Policy to use from a parent Response (or URL),
+        and a Request to be sent.
+
+        - if a valid policy is set in Request meta, it is used.
+        - if the policy is set in meta but is wrong (e.g. a typo error),
+          the policy from settings is used
+        - if the policy is not set in Request meta,
+          but there is a Referrer-policy header in the parent response,
+          it is used if valid
+        - otherwise, the policy from settings is used.
+        """
+        policy_name = request.meta.get('referrer_policy')
+        if policy_name is None:
+            if isinstance(resp_or_url, Response):
+                policy_header = resp_or_url.headers.get('Referrer-Policy')
+                if policy_header is not None:
+                    policy_name = to_unicode(policy_header.decode('latin1'))
+        if policy_name is None:
+            return self.default_policy()
+
+        cls = _load_policy_class(policy_name, warning_only=True)
+        return cls() if cls else self.default_policy()
+
+    def process_spider_output(self, response, result, spider):
+        def _set_referer(r):
+            if isinstance(r, Request):
+                referrer = self.policy(response, r).referrer(response.url, r.url)
+                if referrer is not None:
+                    r.headers.setdefault('Referer', referrer)
+            return r
+        return (_set_referer(r) for r in result or ())
+
+    def request_scheduled(self, request, spider):
+        # check redirected request to patch "Referer" header if necessary
+        redirected_urls = request.meta.get('redirect_urls', [])
+        if redirected_urls:
+            request_referrer = request.headers.get('Referer')
+            # we don't patch the referrer value if there is none
+            if request_referrer is not None:
+                # the request's referrer header value acts as a surrogate
+                # for the parent response URL
+                #
+                # Note: if the 3xx response contained a Referrer-Policy header,
+                #       the information is not available using this hook
+                parent_url = safe_url_string(request_referrer)
+                policy_referrer = self.policy(parent_url, request).referrer(
+                    parent_url, request.url)
+                if policy_referrer != request_referrer:
+                    if policy_referrer is None:
+                        request.headers.pop('Referer')
+                    else:
+                        request.headers['Referer'] = policy_referrer
diff --git a/scrapy/contrib/spidermiddleware/urllength.py b/scrapy/spidermiddlewares/urllength.py
similarity index 71%
rename from scrapy/contrib/spidermiddleware/urllength.py
rename to scrapy/spidermiddlewares/urllength.py
index fa6f2c909..5be1f80cb 100644
--- a/scrapy/contrib/spidermiddleware/urllength.py
+++ b/scrapy/spidermiddlewares/urllength.py
@@ -4,11 +4,15 @@ Url Length Spider Middleware
 See documentation in docs/topics/spider-middleware.rst
 """
 
-from scrapy import log
+import logging
+
 from scrapy.http import Request
 from scrapy.exceptions import NotConfigured
 
-class UrlLengthMiddleware(object):
+logger = logging.getLogger(__name__)
+
+
+class UrlLengthMiddleware:
 
     def __init__(self, maxlength):
         self.maxlength = maxlength
@@ -23,9 +27,9 @@ class UrlLengthMiddleware(object):
     def process_spider_output(self, response, result, spider):
         def _filter(request):
             if isinstance(request, Request) and len(request.url) > self.maxlength:
-                log.msg(format="Ignoring link (url length > %(maxlength)d): %(url)s ",
-                        level=log.DEBUG, spider=spider,
-                        maxlength=self.maxlength, url=request.url)
+                logger.debug("Ignoring link (url length > %(maxlength)d): %(url)s ",
+                             {'maxlength': self.maxlength, 'url': request.url},
+                             extra={'spider': spider})
                 return False
             else:
                 return True
diff --git a/scrapy/spiders/__init__.py b/scrapy/spiders/__init__.py
new file mode 100644
index 000000000..12b4fba09
--- /dev/null
+++ b/scrapy/spiders/__init__.py
@@ -0,0 +1,118 @@
+"""
+Base class for Scrapy spiders
+
+See documentation in docs/topics/spiders.rst
+"""
+import logging
+import warnings
+
+from scrapy import signals
+from scrapy.http import Request
+from scrapy.utils.trackref import object_ref
+from scrapy.utils.url import url_is_from_spider
+from scrapy.utils.deprecate import method_is_overridden
+
+
+class Spider(object_ref):
+    """Base class for scrapy spiders. All spiders must inherit from this
+    class.
+    """
+
+    name = None
+    custom_settings = None
+
+    def __init__(self, name=None, **kwargs):
+        if name is not None:
+            self.name = name
+        elif not getattr(self, 'name', None):
+            raise ValueError("%s must have a name" % type(self).__name__)
+        self.__dict__.update(kwargs)
+        if not hasattr(self, 'start_urls'):
+            self.start_urls = []
+
+    @property
+    def logger(self):
+        logger = logging.getLogger(self.name)
+        return logging.LoggerAdapter(logger, {'spider': self})
+
+    def log(self, message, level=logging.DEBUG, **kw):
+        """Log the given message at the given log level
+
+        This helper wraps a log call to the logger within the spider, but you
+        can use it directly (e.g. Spider.logger.info('msg')) or use any other
+        Python logger too.
+        """
+        self.logger.log(level, message, **kw)
+
+    @classmethod
+    def from_crawler(cls, crawler, *args, **kwargs):
+        spider = cls(*args, **kwargs)
+        spider._set_crawler(crawler)
+        return spider
+
+    def _set_crawler(self, crawler):
+        self.crawler = crawler
+        self.settings = crawler.settings
+        crawler.signals.connect(self.close, signals.spider_closed)
+
+    def start_requests(self):
+        cls = self.__class__
+        if not self.start_urls and hasattr(self, 'start_url'):
+            raise AttributeError(
+                "Crawling could not start: 'start_urls' not found "
+                "or empty (but found 'start_url' attribute instead, "
+                "did you miss an 's'?)")
+        if method_is_overridden(cls, Spider, 'make_requests_from_url'):
+            warnings.warn(
+                "Spider.make_requests_from_url method is deprecated; it "
+                "won't be called in future Scrapy releases. Please "
+                "override Spider.start_requests method instead (see %s.%s)." % (
+                    cls.__module__, cls.__name__
+                ),
+            )
+            for url in self.start_urls:
+                yield self.make_requests_from_url(url)
+        else:
+            for url in self.start_urls:
+                yield Request(url, dont_filter=True)
+
+    def make_requests_from_url(self, url):
+        """ This method is deprecated. """
+        warnings.warn(
+            "Spider.make_requests_from_url method is deprecated: "
+            "it will be removed and not be called by the default "
+            "Spider.start_requests method in future Scrapy releases. "
+            "Please override Spider.start_requests method instead."
+        )
+        return Request(url, dont_filter=True)
+
+    def _parse(self, response, **kwargs):
+        return self.parse(response, **kwargs)
+
+    def parse(self, response, **kwargs):
+        raise NotImplementedError('{}.parse callback is not defined'.format(self.__class__.__name__))
+
+    @classmethod
+    def update_settings(cls, settings):
+        settings.setdict(cls.custom_settings or {}, priority='spider')
+
+    @classmethod
+    def handles_request(cls, request):
+        return url_is_from_spider(request.url, cls)
+
+    @staticmethod
+    def close(spider, reason):
+        closed = getattr(spider, 'closed', None)
+        if callable(closed):
+            return closed(reason)
+
+    def __str__(self):
+        return "<%s %r at 0x%0x>" % (type(self).__name__, self.name, id(self))
+
+    __repr__ = __str__
+
+
+# Top-level imports
+from scrapy.spiders.crawl import CrawlSpider, Rule
+from scrapy.spiders.feed import XMLFeedSpider, CSVFeedSpider
+from scrapy.spiders.sitemap import SitemapSpider
diff --git a/scrapy/spiders/crawl.py b/scrapy/spiders/crawl.py
new file mode 100644
index 000000000..c9fbce08d
--- /dev/null
+++ b/scrapy/spiders/crawl.py
@@ -0,0 +1,150 @@
+"""
+This modules implements the CrawlSpider which is the recommended spider to use
+for scraping typical web sites that requires crawling pages.
+
+See documentation in docs/topics/spiders.rst
+"""
+
+import copy
+import warnings
+
+from scrapy.exceptions import ScrapyDeprecationWarning
+from scrapy.http import Request, HtmlResponse
+from scrapy.linkextractors import LinkExtractor
+from scrapy.spiders import Spider
+from scrapy.utils.python import get_func_args
+from scrapy.utils.spider import iterate_spider_output
+
+
+def _identity(x):
+    return x
+
+
+def _identity_process_request(request, response):
+    return request
+
+
+def _get_method(method, spider):
+    if callable(method):
+        return method
+    elif isinstance(method, str):
+        return getattr(spider, method, None)
+
+
+_default_link_extractor = LinkExtractor()
+
+
+class Rule:
+
+    def __init__(self, link_extractor=None, callback=None, cb_kwargs=None, follow=None,
+                 process_links=None, process_request=None, errback=None):
+        self.link_extractor = link_extractor or _default_link_extractor
+        self.callback = callback
+        self.errback = errback
+        self.cb_kwargs = cb_kwargs or {}
+        self.process_links = process_links or _identity
+        self.process_request = process_request or _identity_process_request
+        self.process_request_argcount = None
+        self.follow = follow if follow is not None else not callback
+
+    def _compile(self, spider):
+        self.callback = _get_method(self.callback, spider)
+        self.errback = _get_method(self.errback, spider)
+        self.process_links = _get_method(self.process_links, spider)
+        self.process_request = _get_method(self.process_request, spider)
+        self.process_request_argcount = len(get_func_args(self.process_request))
+        if self.process_request_argcount == 1:
+            warnings.warn(
+                "Rule.process_request should accept two arguments "
+                "(request, response), accepting only one is deprecated",
+                category=ScrapyDeprecationWarning,
+                stacklevel=2,
+            )
+
+    def _process_request(self, request, response):
+        """
+        Wrapper around the request processing function to maintain backward
+        compatibility with functions that do not take a Response object
+        """
+        args = [request] if self.process_request_argcount == 1 else [request, response]
+        return self.process_request(*args)
+
+
+class CrawlSpider(Spider):
+
+    rules = ()
+
+    def __init__(self, *a, **kw):
+        super().__init__(*a, **kw)
+        self._compile_rules()
+
+    def _parse(self, response, **kwargs):
+        return self._parse_response(
+            response=response,
+            callback=self.parse_start_url,
+            cb_kwargs=kwargs,
+            follow=True,
+        )
+
+    def parse_start_url(self, response, **kwargs):
+        return []
+
+    def process_results(self, response, results):
+        return results
+
+    def _build_request(self, rule_index, link):
+        return Request(
+            url=link.url,
+            callback=self._callback,
+            errback=self._errback,
+            meta=dict(rule=rule_index, link_text=link.text),
+        )
+
+    def _requests_to_follow(self, response):
+        if not isinstance(response, HtmlResponse):
+            return
+        seen = set()
+        for rule_index, rule in enumerate(self._rules):
+            links = [lnk for lnk in rule.link_extractor.extract_links(response)
+                     if lnk not in seen]
+            for link in rule.process_links(links):
+                seen.add(link)
+                request = self._build_request(rule_index, link)
+                yield rule._process_request(request, response)
+
+    def _callback(self, response):
+        rule = self._rules[response.meta['rule']]
+        return self._parse_response(response, rule.callback, rule.cb_kwargs, rule.follow)
+
+    def _errback(self, failure):
+        rule = self._rules[failure.request.meta['rule']]
+        return self._handle_failure(failure, rule.errback)
+
+    def _parse_response(self, response, callback, cb_kwargs, follow=True):
+        if callback:
+            cb_res = callback(response, **cb_kwargs) or ()
+            cb_res = self.process_results(response, cb_res)
+            for request_or_item in iterate_spider_output(cb_res):
+                yield request_or_item
+
+        if follow and self._follow_links:
+            for request_or_item in self._requests_to_follow(response):
+                yield request_or_item
+
+    def _handle_failure(self, failure, errback):
+        if errback:
+            results = errback(failure) or ()
+            for request_or_item in iterate_spider_output(results):
+                yield request_or_item
+
+    def _compile_rules(self):
+        self._rules = []
+        for rule in self.rules:
+            self._rules.append(copy.copy(rule))
+            self._rules[-1]._compile(self)
+
+    @classmethod
+    def from_crawler(cls, crawler, *args, **kwargs):
+        spider = super().from_crawler(crawler, *args, **kwargs)
+        spider._follow_links = crawler.settings.getbool('CRAWLSPIDER_FOLLOW_LINKS', True)
+        return spider
diff --git a/scrapy/contrib/spiders/feed.py b/scrapy/spiders/feed.py
similarity index 88%
rename from scrapy/contrib/spiders/feed.py
rename to scrapy/spiders/feed.py
index fa538f473..cf658aec4 100644
--- a/scrapy/contrib/spiders/feed.py
+++ b/scrapy/spiders/feed.py
@@ -4,9 +4,7 @@ for scraping from an XML feed.
 
 See documentation in docs/topics/spiders.rst
 """
-from scrapy.spider import Spider
-from scrapy.item import BaseItem
-from scrapy.http import Request
+from scrapy.spiders import Spider
 from scrapy.utils.iterators import xmliter, csviter
 from scrapy.utils.spider import iterate_spider_output
 from scrapy.selector import Selector
@@ -33,7 +31,7 @@ class XMLFeedSpider(Spider):
         processing required before returning the results to the framework core,
         for example setting the item GUIDs. It receives a list of results and
         the response which originated that results. It must return a list of
-        results (Items or Requests).
+        results (items or requests).
         """
         return results
 
@@ -54,7 +52,7 @@ class XMLFeedSpider(Spider):
         """This method is called for the nodes matching the provided tag name
         (itertag). Receives the response and an Selector for each node.
         Overriding this method is mandatory. Otherwise, you spider won't work.
-        This method must return either a BaseItem, a Request, or a list
+        This method must return either an item, a request, or a list
         containing any of them.
         """
 
@@ -63,7 +61,7 @@ class XMLFeedSpider(Spider):
             for result_item in self.process_results(response, ret):
                 yield result_item
 
-    def parse(self, response):
+    def _parse(self, response, **kwargs):
         if not hasattr(self, 'parse_node'):
             raise NotConfigured('You must define parse_node method in order to scrape this XML feed')
 
@@ -92,16 +90,18 @@ class XMLFeedSpider(Spider):
         for (prefix, uri) in self.namespaces:
             selector.register_namespace(prefix, uri)
 
+
 class CSVFeedSpider(Spider):
     """Spider for parsing CSV feeds.
     It receives a CSV file in a response; iterates through each of its rows,
     and calls parse_row with a dict containing each field's data.
 
-    You can set some options regarding the CSV file, such as the delimiter
+    You can set some options regarding the CSV file, such as the delimiter, quotechar
     and the file's headers.
     """
 
-    delimiter = None # When this is None, python's csv module's default delimiter is used
+    delimiter = None  # When this is None, python's csv module's default delimiter is used
+    quotechar = None  # When this is None, python's csv module's default quotechar is used
     headers = None
 
     def process_results(self, response, results):
@@ -123,18 +123,13 @@ class CSVFeedSpider(Spider):
         process_results methods for pre and post-processing purposes.
         """
 
-        for row in csviter(response, self.delimiter, self.headers):
-            ret = self.parse_row(response, row)
-            if isinstance(ret, (BaseItem, Request)):
-                ret = [ret]
-            if not isinstance(ret, (list, tuple)):
-                raise TypeError('You cannot return an "%s" object from a spider' % type(ret).__name__)
+        for row in csviter(response, self.delimiter, self.headers, self.quotechar):
+            ret = iterate_spider_output(self.parse_row(response, row))
             for result_item in self.process_results(response, ret):
                 yield result_item
 
-    def parse(self, response):
+    def _parse(self, response, **kwargs):
         if not hasattr(self, 'parse_row'):
             raise NotConfigured('You must define parse_row method in order to scrape this CSV feed')
         response = self.adapt_response(response)
         return self.parse_rows(response)
-
diff --git a/scrapy/contrib/spiders/init.py b/scrapy/spiders/init.py
similarity index 91%
rename from scrapy/contrib/spiders/init.py
rename to scrapy/spiders/init.py
index 9c94a7b33..fe8c94e78 100644
--- a/scrapy/contrib/spiders/init.py
+++ b/scrapy/spiders/init.py
@@ -1,11 +1,12 @@
-from scrapy.spider import Spider
+from scrapy.spiders import Spider
 from scrapy.utils.spider import iterate_spider_output
 
+
 class InitSpider(Spider):
     """Base Spider with initialization facilities"""
 
     def start_requests(self):
-        self._postinit_reqs = super(InitSpider, self).start_requests()
+        self._postinit_reqs = super().start_requests()
         return iterate_spider_output(self.init_request())
 
     def initialized(self, response=None):
@@ -20,12 +21,11 @@ class InitSpider(Spider):
         is called this spider is considered initialized. If you need to perform
         several requests for initializing your spider, you can do so by using
         different callbacks. The only requirement is that the final callback
-        (of the last initialization request) must be self.initialized. 
-        
+        (of the last initialization request) must be self.initialized.
+
         The default implementation calls self.initialized immediately, and
         means that no initialization is needed. This method should be
         overridden only when you need to perform requests to initialize your
         spider
         """
         return self.initialized()
-
diff --git a/scrapy/contrib/spiders/sitemap.py b/scrapy/spiders/sitemap.py
similarity index 50%
rename from scrapy/contrib/spiders/sitemap.py
rename to scrapy/spiders/sitemap.py
index 84ae04d08..1f72e76b7 100644
--- a/scrapy/contrib/spiders/sitemap.py
+++ b/scrapy/spiders/sitemap.py
@@ -1,10 +1,14 @@
 import re
+import logging
 
-from scrapy.spider import Spider
+from scrapy.spiders import Spider
 from scrapy.http import Request, XmlResponse
 from scrapy.utils.sitemap import Sitemap, sitemap_urls_from_robots
-from scrapy.utils.gz import gunzip, is_gzipped
-from scrapy import log
+from scrapy.utils.gz import gunzip, gzip_magic_number
+
+
+logger = logging.getLogger(__name__)
+
 
 class SitemapSpider(Spider):
 
@@ -14,63 +18,82 @@ class SitemapSpider(Spider):
     sitemap_alternate_links = False
 
     def __init__(self, *a, **kw):
-        super(SitemapSpider, self).__init__(*a, **kw)
+        super().__init__(*a, **kw)
         self._cbs = []
         for r, c in self.sitemap_rules:
-            if isinstance(c, basestring):
+            if isinstance(c, str):
                 c = getattr(self, c)
             self._cbs.append((regex(r), c))
         self._follow = [regex(x) for x in self.sitemap_follow]
 
     def start_requests(self):
-        return (Request(x, callback=self._parse_sitemap) for x in self.sitemap_urls)
+        for url in self.sitemap_urls:
+            yield Request(url, self._parse_sitemap)
+
+    def sitemap_filter(self, entries):
+        """This method can be used to filter sitemap entries by their
+        attributes, for example, you can filter locs with lastmod greater
+        than a given date (see docs).
+        """
+        for entry in entries:
+            yield entry
 
     def _parse_sitemap(self, response):
         if response.url.endswith('/robots.txt'):
-            for url in sitemap_urls_from_robots(response.body):
+            for url in sitemap_urls_from_robots(response.text, base_url=response.url):
                 yield Request(url, callback=self._parse_sitemap)
         else:
             body = self._get_sitemap_body(response)
             if body is None:
-                log.msg(format="Ignoring invalid sitemap: %(response)s",
-                        level=log.WARNING, spider=self, response=response)
+                logger.warning("Ignoring invalid sitemap: %(response)s",
+                               {'response': response}, extra={'spider': self})
                 return
 
             s = Sitemap(body)
+            it = self.sitemap_filter(s)
+
             if s.type == 'sitemapindex':
-                for loc in iterloc(s, self.sitemap_alternate_links):
+                for loc in iterloc(it, self.sitemap_alternate_links):
                     if any(x.search(loc) for x in self._follow):
                         yield Request(loc, callback=self._parse_sitemap)
             elif s.type == 'urlset':
-                for loc in iterloc(s):
+                for loc in iterloc(it, self.sitemap_alternate_links):
                     for r, c in self._cbs:
                         if r.search(loc):
                             yield Request(loc, callback=c)
                             break
 
     def _get_sitemap_body(self, response):
-        """Return the sitemap body contained in the given response, or None if the
-        response is not a sitemap.
+        """Return the sitemap body contained in the given response,
+        or None if the response is not a sitemap.
         """
         if isinstance(response, XmlResponse):
             return response.body
-        elif is_gzipped(response):
+        elif gzip_magic_number(response):
             return gunzip(response.body)
-        elif response.url.endswith('.xml'):
+        # actual gzipped sitemap files are decompressed above ;
+        # if we are here (response body is not gzipped)
+        # and have a response for .xml.gz,
+        # it usually means that it was already gunzipped
+        # by HttpCompression middleware,
+        # the HTTP response being sent with "Content-Encoding: gzip"
+        # without actually being a .xml.gz file in the first place,
+        # merely XML gzip-compressed on the fly,
+        # in other word, here, we have plain XML
+        elif response.url.endswith('.xml') or response.url.endswith('.xml.gz'):
             return response.body
-        elif response.url.endswith('.xml.gz'):
-            return gunzip(response.body)
+
 
 def regex(x):
-    if isinstance(x, basestring):
+    if isinstance(x, str):
         return re.compile(x)
     return x
 
+
 def iterloc(it, alt=False):
     for d in it:
         yield d['loc']
 
         # Also consider alternate URLs (xhtml:link rel="alternate")
         if alt and 'alternate' in d:
-            for l in d['alternate']:
-                yield l
+            yield from d['alternate']
diff --git a/scrapy/squeue.py b/scrapy/squeue.py
deleted file mode 100644
index 6e2a60fd2..000000000
--- a/scrapy/squeue.py
+++ /dev/null
@@ -1,40 +0,0 @@
-"""
-Scheduler queues
-"""
-
-import marshal
-from six.moves import cPickle as pickle
-
-from queuelib import queue
-
-def _serializable_queue(queue_class, serialize, deserialize):
-
-    class SerializableQueue(queue_class):
-
-        def push(self, obj):
-            s = serialize(obj)
-            super(SerializableQueue, self).push(s)
-
-        def pop(self):
-            s = super(SerializableQueue, self).pop()
-            if s:
-                return deserialize(s)
-
-    return SerializableQueue
-
-def _pickle_serialize(obj):
-    try:
-        return pickle.dumps(obj, protocol=2)
-    except pickle.PicklingError as e:
-        raise ValueError(str(e))
-
-PickleFifoDiskQueue = _serializable_queue(queue.FifoDiskQueue, \
-    _pickle_serialize, pickle.loads)
-PickleLifoDiskQueue = _serializable_queue(queue.LifoDiskQueue, \
-    _pickle_serialize, pickle.loads)
-MarshalFifoDiskQueue = _serializable_queue(queue.FifoDiskQueue, \
-    marshal.dumps, marshal.loads)
-MarshalLifoDiskQueue = _serializable_queue(queue.LifoDiskQueue, \
-    marshal.dumps, marshal.loads)
-FifoMemoryQueue = queue.FifoMemoryQueue
-LifoMemoryQueue = queue.LifoMemoryQueue
diff --git a/scrapy/squeues.py b/scrapy/squeues.py
new file mode 100644
index 000000000..77ffda6f7
--- /dev/null
+++ b/scrapy/squeues.py
@@ -0,0 +1,125 @@
+"""
+Scheduler queues
+"""
+
+import marshal
+import os
+import pickle
+
+from queuelib import queue
+
+from scrapy.utils.reqser import request_to_dict, request_from_dict
+
+
+def _with_mkdir(queue_class):
+
+    class DirectoriesCreated(queue_class):
+
+        def __init__(self, path, *args, **kwargs):
+            dirname = os.path.dirname(path)
+            if not os.path.exists(dirname):
+                os.makedirs(dirname, exist_ok=True)
+
+            super().__init__(path, *args, **kwargs)
+
+    return DirectoriesCreated
+
+
+def _serializable_queue(queue_class, serialize, deserialize):
+
+    class SerializableQueue(queue_class):
+
+        def push(self, obj):
+            s = serialize(obj)
+            super().push(s)
+
+        def pop(self):
+            s = super().pop()
+            if s:
+                return deserialize(s)
+
+    return SerializableQueue
+
+
+def _scrapy_serialization_queue(queue_class):
+
+    class ScrapyRequestQueue(queue_class):
+
+        def __init__(self, crawler, key):
+            self.spider = crawler.spider
+            super().__init__(key)
+
+        @classmethod
+        def from_crawler(cls, crawler, key, *args, **kwargs):
+            return cls(crawler, key)
+
+        def push(self, request):
+            request = request_to_dict(request, self.spider)
+            return super().push(request)
+
+        def pop(self):
+            request = super().pop()
+
+            if not request:
+                return None
+
+            request = request_from_dict(request, self.spider)
+            return request
+
+    return ScrapyRequestQueue
+
+
+def _scrapy_non_serialization_queue(queue_class):
+
+    class ScrapyRequestQueue(queue_class):
+        @classmethod
+        def from_crawler(cls, crawler, *args, **kwargs):
+            return cls()
+
+    return ScrapyRequestQueue
+
+
+def _pickle_serialize(obj):
+    try:
+        return pickle.dumps(obj, protocol=4)
+    # Both pickle.PicklingError and AttributeError can be raised by pickle.dump(s)
+    # TypeError is raised from parsel.Selector
+    except (pickle.PicklingError, AttributeError, TypeError) as e:
+        raise ValueError(str(e)) from e
+
+
+PickleFifoDiskQueueNonRequest = _serializable_queue(
+    _with_mkdir(queue.FifoDiskQueue),
+    _pickle_serialize,
+    pickle.loads
+)
+PickleLifoDiskQueueNonRequest = _serializable_queue(
+    _with_mkdir(queue.LifoDiskQueue),
+    _pickle_serialize,
+    pickle.loads
+)
+MarshalFifoDiskQueueNonRequest = _serializable_queue(
+    _with_mkdir(queue.FifoDiskQueue),
+    marshal.dumps,
+    marshal.loads
+)
+MarshalLifoDiskQueueNonRequest = _serializable_queue(
+    _with_mkdir(queue.LifoDiskQueue),
+    marshal.dumps,
+    marshal.loads
+)
+
+PickleFifoDiskQueue = _scrapy_serialization_queue(
+    PickleFifoDiskQueueNonRequest
+)
+PickleLifoDiskQueue = _scrapy_serialization_queue(
+    PickleLifoDiskQueueNonRequest
+)
+MarshalFifoDiskQueue = _scrapy_serialization_queue(
+    MarshalFifoDiskQueueNonRequest
+)
+MarshalLifoDiskQueue = _scrapy_serialization_queue(
+    MarshalLifoDiskQueueNonRequest
+)
+FifoMemoryQueue = _scrapy_non_serialization_queue(queue.FifoMemoryQueue)
+LifoMemoryQueue = _scrapy_non_serialization_queue(queue.LifoMemoryQueue)
diff --git a/scrapy/stats.py b/scrapy/stats.py
deleted file mode 100644
index b8128dfc2..000000000
--- a/scrapy/stats.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from scrapy.project import crawler
-stats = crawler.stats
-
-import warnings
-from scrapy.exceptions import ScrapyDeprecationWarning
-warnings.warn("Module `scrapy.stats` is deprecated, use `crawler.stats` attribute instead",
-    ScrapyDeprecationWarning, stacklevel=2)
diff --git a/scrapy/statscol.py b/scrapy/statscollectors.py
similarity index 88%
rename from scrapy/statscol.py
rename to scrapy/statscollectors.py
index 8a7eed149..ba7d1a6bf 100644
--- a/scrapy/statscol.py
+++ b/scrapy/statscollectors.py
@@ -2,10 +2,12 @@
 Scrapy extension for collecting scraping stats
 """
 import pprint
+import logging
 
-from scrapy import log
+logger = logging.getLogger(__name__)
 
-class StatsCollector(object):
+
+class StatsCollector:
 
     def __init__(self, crawler):
         self._dump = crawler.settings.getbool('STATS_DUMP')
@@ -41,17 +43,18 @@ class StatsCollector(object):
 
     def close_spider(self, spider, reason):
         if self._dump:
-            log.msg("Dumping Scrapy stats:\n" + pprint.pformat(self._stats), \
-                spider=spider)
+            logger.info("Dumping Scrapy stats:\n" + pprint.pformat(self._stats),
+                        extra={'spider': spider})
         self._persist_stats(self._stats, spider)
 
     def _persist_stats(self, stats, spider):
         pass
 
+
 class MemoryStatsCollector(StatsCollector):
 
     def __init__(self, crawler):
-        super(MemoryStatsCollector, self).__init__(crawler)
+        super().__init__(crawler)
         self.spider_stats = {}
 
     def _persist_stats(self, stats, spider):
@@ -77,5 +80,3 @@ class DummyStatsCollector(StatsCollector):
 
     def min_value(self, key, value, spider=None):
         pass
-
-
diff --git a/scrapy/templates/project/module/items.py.tmpl b/scrapy/templates/project/module/items.py.tmpl
index 2c746138f..88a18331c 100644
--- a/scrapy/templates/project/module/items.py.tmpl
+++ b/scrapy/templates/project/module/items.py.tmpl
@@ -1,9 +1,7 @@
-# -*- coding: utf-8 -*-
-
 # Define here the models for your scraped items
 #
 # See documentation in:
-# http://doc.scrapy.org/en/latest/topics/items.html
+# https://docs.scrapy.org/en/latest/topics/items.html
 
 import scrapy
 
diff --git a/scrapy/templates/project/module/middlewares.py.tmpl b/scrapy/templates/project/module/middlewares.py.tmpl
new file mode 100644
index 000000000..bd09890fe
--- /dev/null
+++ b/scrapy/templates/project/module/middlewares.py.tmpl
@@ -0,0 +1,103 @@
+# Define here the models for your spider middleware
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+from scrapy import signals
+
+# useful for handling different item types with a single interface
+from itemadapter import is_item, ItemAdapter
+
+
+class ${ProjectName}SpiderMiddleware:
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the spider middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_spider_input(self, response, spider):
+        # Called for each response that goes through the spider
+        # middleware and into the spider.
+
+        # Should return None or raise an exception.
+        return None
+
+    def process_spider_output(self, response, result, spider):
+        # Called with the results returned from the Spider, after
+        # it has processed the response.
+
+        # Must return an iterable of Request, or item objects.
+        for i in result:
+            yield i
+
+    def process_spider_exception(self, response, exception, spider):
+        # Called when a spider or process_spider_input() method
+        # (from other spider middleware) raises an exception.
+
+        # Should return either None or an iterable of Request or item objects.
+        pass
+
+    def process_start_requests(self, start_requests, spider):
+        # Called with the start requests of the spider, and works
+        # similarly to the process_spider_output() method, except
+        # that it doesn’t have a response associated.
+
+        # Must return only requests (not items).
+        for r in start_requests:
+            yield r
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
+
+
+class ${ProjectName}DownloaderMiddleware:
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the downloader middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_request(self, request, spider):
+        # Called for each request that goes through the downloader
+        # middleware.
+
+        # Must either:
+        # - return None: continue processing this request
+        # - or return a Response object
+        # - or return a Request object
+        # - or raise IgnoreRequest: process_exception() methods of
+        #   installed downloader middleware will be called
+        return None
+
+    def process_response(self, request, response, spider):
+        # Called with the response returned from the downloader.
+
+        # Must either;
+        # - return a Response object
+        # - return a Request object
+        # - or raise IgnoreRequest
+        return response
+
+    def process_exception(self, request, exception, spider):
+        # Called when a download handler or a process_request()
+        # (from other downloader middleware) raises an exception.
+
+        # Must either:
+        # - return None: continue processing this exception
+        # - return a Response object: stops process_exception() chain
+        # - return a Request object: stops process_exception() chain
+        pass
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
diff --git a/scrapy/templates/project/module/pipelines.py.tmpl b/scrapy/templates/project/module/pipelines.py.tmpl
index 4e9b32e9e..e845f43e9 100644
--- a/scrapy/templates/project/module/pipelines.py.tmpl
+++ b/scrapy/templates/project/module/pipelines.py.tmpl
@@ -1,11 +1,13 @@
-# -*- coding: utf-8 -*-
-
 # Define your item pipelines here
 #
 # Don't forget to add your pipeline to the ITEM_PIPELINES setting
-# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
+# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
 
 
-class ${ProjectName}Pipeline(object):
+# useful for handling different item types with a single interface
+from itemadapter import ItemAdapter
+
+
+class ${ProjectName}Pipeline:
     def process_item(self, item, spider):
         return item
diff --git a/scrapy/templates/project/module/settings.py.tmpl b/scrapy/templates/project/module/settings.py.tmpl
index e9f1ba0e4..a414b5fde 100644
--- a/scrapy/templates/project/module/settings.py.tmpl
+++ b/scrapy/templates/project/module/settings.py.tmpl
@@ -1,17 +1,88 @@
-# -*- coding: utf-8 -*-
-
 # Scrapy settings for $project_name project
 #
-# For simplicity, this file contains only the most important settings by
-# default. All the other settings are documented here:
-#
-#     http://doc.scrapy.org/en/latest/topics/settings.html
+# For simplicity, this file contains only settings considered important or
+# commonly used. You can find more settings consulting the documentation:
 #
+#     https://docs.scrapy.org/en/latest/topics/settings.html
+#     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+#     https://docs.scrapy.org/en/latest/topics/spider-middleware.html
 
 BOT_NAME = '$project_name'
 
 SPIDER_MODULES = ['$project_name.spiders']
 NEWSPIDER_MODULE = '$project_name.spiders'
 
+
 # Crawl responsibly by identifying yourself (and your website) on the user-agent
 #USER_AGENT = '$project_name (+http://www.yourdomain.com)'
+
+# Obey robots.txt rules
+ROBOTSTXT_OBEY = True
+
+# Configure maximum concurrent requests performed by Scrapy (default: 16)
+#CONCURRENT_REQUESTS = 32
+
+# Configure a delay for requests for the same website (default: 0)
+# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
+# See also autothrottle settings and docs
+#DOWNLOAD_DELAY = 3
+# The download delay setting will honor only one of:
+#CONCURRENT_REQUESTS_PER_DOMAIN = 16
+#CONCURRENT_REQUESTS_PER_IP = 16
+
+# Disable cookies (enabled by default)
+#COOKIES_ENABLED = False
+
+# Disable Telnet Console (enabled by default)
+#TELNETCONSOLE_ENABLED = False
+
+# Override the default request headers:
+#DEFAULT_REQUEST_HEADERS = {
+#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+#   'Accept-Language': 'en',
+#}
+
+# Enable or disable spider middlewares
+# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+#SPIDER_MIDDLEWARES = {
+#    '$project_name.middlewares.${ProjectName}SpiderMiddleware': 543,
+#}
+
+# Enable or disable downloader middlewares
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+#DOWNLOADER_MIDDLEWARES = {
+#    '$project_name.middlewares.${ProjectName}DownloaderMiddleware': 543,
+#}
+
+# Enable or disable extensions
+# See https://docs.scrapy.org/en/latest/topics/extensions.html
+#EXTENSIONS = {
+#    'scrapy.extensions.telnet.TelnetConsole': None,
+#}
+
+# Configure item pipelines
+# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+#ITEM_PIPELINES = {
+#    '$project_name.pipelines.${ProjectName}Pipeline': 300,
+#}
+
+# Enable and configure the AutoThrottle extension (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
+#AUTOTHROTTLE_ENABLED = True
+# The initial download delay
+#AUTOTHROTTLE_START_DELAY = 5
+# The maximum download delay to be set in case of high latencies
+#AUTOTHROTTLE_MAX_DELAY = 60
+# The average number of requests Scrapy should be sending in parallel to
+# each remote server
+#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
+# Enable showing throttling stats for every response received:
+#AUTOTHROTTLE_DEBUG = False
+
+# Enable and configure HTTP caching (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
+#HTTPCACHE_ENABLED = True
+#HTTPCACHE_EXPIRATION_SECS = 0
+#HTTPCACHE_DIR = 'httpcache'
+#HTTPCACHE_IGNORE_HTTP_CODES = []
+#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
diff --git a/scrapy/templates/project/scrapy.cfg b/scrapy/templates/project/scrapy.cfg
index 22b1100e7..1daeaa541 100644
--- a/scrapy/templates/project/scrapy.cfg
+++ b/scrapy/templates/project/scrapy.cfg
@@ -1,7 +1,7 @@
 # Automatically created by: scrapy startproject
 #
 # For more information about the [deploy] section see:
-# http://doc.scrapy.org/en/latest/topics/scrapyd.html
+# https://scrapyd.readthedocs.io/en/latest/deploy.html
 
 [settings]
 default = ${project_name}.settings
diff --git a/scrapy/templates/spiders/basic.tmpl b/scrapy/templates/spiders/basic.tmpl
index 2d3b96e38..e9112bc95 100644
--- a/scrapy/templates/spiders/basic.tmpl
+++ b/scrapy/templates/spiders/basic.tmpl
@@ -1,13 +1,10 @@
-# -*- coding: utf-8 -*-
 import scrapy
 
 
 class $classname(scrapy.Spider):
-    name = "$name"
-    allowed_domains = ["$domain"]
-    start_urls = (
-        'http://www.$domain/',
-    )
+    name = '$name'
+    allowed_domains = ['$domain']
+    start_urls = ['http://$domain/']
 
     def parse(self, response):
         pass
diff --git a/scrapy/templates/spiders/crawl.tmpl b/scrapy/templates/spiders/crawl.tmpl
index 0482a5496..356496487 100644
--- a/scrapy/templates/spiders/crawl.tmpl
+++ b/scrapy/templates/spiders/crawl.tmpl
@@ -1,23 +1,20 @@
-# -*- coding: utf-8 -*-
 import scrapy
-from scrapy.contrib.linkextractors import LinkExtractor
-from scrapy.contrib.spiders import CrawlSpider, Rule
-
-from $project_name.items import ${ProjectName}Item
+from scrapy.linkextractors import LinkExtractor
+from scrapy.spiders import CrawlSpider, Rule
 
 
 class $classname(CrawlSpider):
     name = '$name'
     allowed_domains = ['$domain']
-    start_urls = ['http://www.$domain/']
+    start_urls = ['http://$domain/']
 
     rules = (
         Rule(LinkExtractor(allow=r'Items/'), callback='parse_item', follow=True),
     )
 
     def parse_item(self, response):
-        i = ${ProjectName}Item()
-        #i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract()
-        #i['name'] = response.xpath('//div[@id="name"]').extract()
-        #i['description'] = response.xpath('//div[@id="description"]').extract()
-        return i
+        item = {}
+        #item['domain_id'] = response.xpath('//input[@id="sid"]/@value').get()
+        #item['name'] = response.xpath('//div[@id="name"]').get()
+        #item['description'] = response.xpath('//div[@id="description"]').get()
+        return item
diff --git a/scrapy/templates/spiders/csvfeed.tmpl b/scrapy/templates/spiders/csvfeed.tmpl
index 7e10cd8a8..cbcbe9e2c 100644
--- a/scrapy/templates/spiders/csvfeed.tmpl
+++ b/scrapy/templates/spiders/csvfeed.tmpl
@@ -1,13 +1,10 @@
-# -*- coding: utf-8 -*-
-from scrapy.contrib.spiders import CSVFeedSpider
-
-from $project_name.items import ${ProjectName}Item
+from scrapy.spiders import CSVFeedSpider
 
 
 class $classname(CSVFeedSpider):
     name = '$name'
     allowed_domains = ['$domain']
-    start_urls = ['http://www.$domain/feed.csv']
+    start_urls = ['http://$domain/feed.csv']
     # headers = ['id', 'name', 'description', 'image_link']
     # delimiter = '\t'
 
@@ -16,7 +13,7 @@ class $classname(CSVFeedSpider):
     #    return response
 
     def parse_row(self, response, row):
-        i = ${ProjectName}Item()
+        i = {}
         #i['url'] = row['url']
         #i['name'] = row['name']
         #i['description'] = row['description']
diff --git a/scrapy/templates/spiders/xmlfeed.tmpl b/scrapy/templates/spiders/xmlfeed.tmpl
index a0e26aa14..5aa2aa8b0 100644
--- a/scrapy/templates/spiders/xmlfeed.tmpl
+++ b/scrapy/templates/spiders/xmlfeed.tmpl
@@ -1,19 +1,16 @@
-# -*- coding: utf-8 -*-
-from scrapy.contrib.spiders import XMLFeedSpider
-
-from $project_name.items import ${ProjectName}Item
+from scrapy.spiders import XMLFeedSpider
 
 
 class $classname(XMLFeedSpider):
     name = '$name'
     allowed_domains = ['$domain']
-    start_urls = ['http://www.$domain/feed.xml']
+    start_urls = ['http://$domain/feed.xml']
     iterator = 'iternodes' # you can change this; see the docs
     itertag = 'item' # change it accordingly
 
     def parse_node(self, response, selector):
-        i = ${ProjectName}Item()
-        #i['url'] = selector.select('url').extract()
-        #i['name'] = selector.select('name').extract()
-        #i['description'] = selector.select('description').extract()
-        return i
+        item = {}
+        #item['url'] = selector.select('url').get()
+        #item['name'] = selector.select('name').get()
+        #item['description'] = selector.select('description').get()
+        return item
diff --git a/scrapy/utils/benchserver.py b/scrapy/utils/benchserver.py
index 4385d72a9..f595a1acb 100644
--- a/scrapy/utils/benchserver.py
+++ b/scrapy/utils/benchserver.py
@@ -1,8 +1,8 @@
 import random
-from six.moves.urllib.parse import urlencode
+from urllib.parse import urlencode
+
 from twisted.web.server import Site
 from twisted.web.resource import Resource
-from twisted.internet import reactor
 
 
 class Root(Resource):
@@ -13,26 +13,26 @@ class Root(Resource):
         return self
 
     def render(self, request):
-        total = _getarg(request, 'total', 100, int)
-        show = _getarg(request, 'show', 10, int)
+        total = _getarg(request, b'total', 100, int)
+        show = _getarg(request, b'show', 10, int)
         nlist = [random.randint(1, total) for _ in range(show)]
-        request.write("<html><head></head><body>")
+        request.write(b"<html><head></head><body>")
         args = request.args.copy()
         for nl in nlist:
             args['n'] = nl
             argstr = urlencode(args, doseq=True)
             request.write("<a href='/follow?{0}'>follow {1}</a><br>"
-                          .format(argstr, nl))
-        request.write("</body></html>")
-        return ''
+                          .format(argstr, nl).encode('utf8'))
+        request.write(b"</body></html>")
+        return b''
 
 
 def _getarg(request, name, default=None, type=str):
-    return type(request.args[name][0]) \
-        if name in request.args else default
+    return type(request.args[name][0]) if name in request.args else default
 
 
 if __name__ == '__main__':
+    from twisted.internet import reactor
     root = Root()
     factory = Site(root)
     httpPort = reactor.listenTCP(8998, Site(root))
diff --git a/scrapy/utils/boto.py b/scrapy/utils/boto.py
new file mode 100644
index 000000000..12321caa5
--- /dev/null
+++ b/scrapy/utils/boto.py
@@ -0,0 +1,11 @@
+"""Boto/botocore helpers"""
+
+from scrapy.exceptions import NotConfigured
+
+
+def is_botocore():
+    try:
+        import botocore  # noqa: F401
+        return True
+    except ImportError:
+        raise NotConfigured('missing botocore library')
diff --git a/scrapy/utils/conf.py b/scrapy/utils/conf.py
index caa80a5a1..90a52b25b 100644
--- a/scrapy/utils/conf.py
+++ b/scrapy/utils/conf.py
@@ -1,22 +1,60 @@
+import numbers
 import os
 import sys
+import warnings
+from configparser import ConfigParser
 from operator import itemgetter
 
-import six
-from six.moves.configparser import SafeConfigParser
+from scrapy.exceptions import ScrapyDeprecationWarning, UsageError
+
+from scrapy.settings import BaseSettings
+from scrapy.utils.deprecate import update_classpath
+from scrapy.utils.python import without_none_values
 
 
-def build_component_list(base, custom):
-    """Compose a component list based on a custom and base dict of components
-    (typically middlewares or extensions), unless custom is already a list, in
-    which case it's returned.
-    """
+def build_component_list(compdict, custom=None, convert=update_classpath):
+    """Compose a component list from a { class: order } dictionary."""
+
+    def _check_components(complist):
+        if len({convert(c) for c in complist}) != len(complist):
+            raise ValueError('Some paths in {!r} convert to the same object, '
+                             'please update your settings'.format(complist))
+
+    def _map_keys(compdict):
+        if isinstance(compdict, BaseSettings):
+            compbs = BaseSettings()
+            for k, v in compdict.items():
+                prio = compdict.getpriority(k)
+                if compbs.getpriority(convert(k)) == prio:
+                    raise ValueError('Some paths in {!r} convert to the same '
+                                     'object, please update your settings'
+                                     ''.format(list(compdict.keys())))
+                else:
+                    compbs.set(convert(k), v, priority=prio)
+            return compbs
+        else:
+            _check_components(compdict)
+            return {convert(k): v for k, v in compdict.items()}
+
+    def _validate_values(compdict):
+        """Fail if a value in the components dict is not a real number or None."""
+        for name, value in compdict.items():
+            if value is not None and not isinstance(value, numbers.Real):
+                raise ValueError('Invalid value {} for component {}, please provide '
+                                 'a real number or None instead'.format(value, name))
+
+    # BEGIN Backward compatibility for old (base, custom) call signature
     if isinstance(custom, (list, tuple)):
-        return custom
-    compdict = base.copy()
-    compdict.update(custom)
-    items = (x for x in six.iteritems(compdict) if x[1] is not None)
-    return [x[0] for x in sorted(items, key=itemgetter(1))]
+        _check_components(custom)
+        return type(custom)(convert(c) for c in custom)
+
+    if custom is not None:
+        compdict.update(custom)
+    # END Backward compatibility
+
+    _validate_values(compdict)
+    compdict = without_none_values(_map_keys(compdict))
+    return [k for k, v in sorted(compdict.items(), key=itemgetter(1))]
 
 
 def arglist_to_dict(arglist):
@@ -55,16 +93,103 @@ def init_env(project='default', set_syspath=True):
 
 
 def get_config(use_closest=True):
-    """Get Scrapy config file as a SafeConfigParser"""
+    """Get Scrapy config file as a ConfigParser"""
     sources = get_sources(use_closest)
-    cfg = SafeConfigParser()
+    cfg = ConfigParser()
     cfg.read(sources)
     return cfg
 
 
 def get_sources(use_closest=True):
-    sources = ['/etc/scrapy.cfg', r'c:\scrapy\scrapy.cfg',
-               os.path.expanduser('~/.scrapy.cfg')]
+    xdg_config_home = os.environ.get('XDG_CONFIG_HOME') or os.path.expanduser('~/.config')
+    sources = [
+        '/etc/scrapy.cfg',
+        r'c:\scrapy\scrapy.cfg',
+        xdg_config_home + '/scrapy.cfg',
+        os.path.expanduser('~/.scrapy.cfg'),
+    ]
     if use_closest:
         sources.append(closest_scrapy_cfg())
     return sources
+
+
+def feed_complete_default_values_from_settings(feed, settings):
+    out = feed.copy()
+    out.setdefault("batch_item_count", settings.getint('FEED_EXPORT_BATCH_ITEM_COUNT'))
+    out.setdefault("encoding", settings["FEED_EXPORT_ENCODING"])
+    out.setdefault("fields", settings.getlist("FEED_EXPORT_FIELDS") or None)
+    out.setdefault("store_empty", settings.getbool("FEED_STORE_EMPTY"))
+    out.setdefault("uri_params", settings["FEED_URI_PARAMS"])
+    if settings["FEED_EXPORT_INDENT"] is None:
+        out.setdefault("indent", None)
+    else:
+        out.setdefault("indent", settings.getint("FEED_EXPORT_INDENT"))
+    return out
+
+
+def feed_process_params_from_cli(settings, output, output_format=None,
+                                 overwrite_output=None):
+    """
+    Receives feed export params (from the 'crawl' or 'runspider' commands),
+    checks for inconsistencies in their quantities and returns a dictionary
+    suitable to be used as the FEEDS setting.
+    """
+    valid_output_formats = without_none_values(
+        settings.getwithbase('FEED_EXPORTERS')
+    ).keys()
+
+    def check_valid_format(output_format):
+        if output_format not in valid_output_formats:
+            raise UsageError(
+                "Unrecognized output format '%s'. Set a supported one (%s) "
+                "after a colon at the end of the output URI (i.e. -o/-O "
+                "<URI>:<FORMAT>) or as a file extension." % (
+                    output_format,
+                    tuple(valid_output_formats),
+                )
+            )
+
+    overwrite = False
+    if overwrite_output:
+        if output:
+            raise UsageError(
+                "Please use only one of -o/--output and -O/--overwrite-output"
+            )
+        output = overwrite_output
+        overwrite = True
+
+    if output_format:
+        if len(output) == 1:
+            check_valid_format(output_format)
+            message = (
+                'The -t command line option is deprecated in favor of '
+                'specifying the output format within the output URI. See the '
+                'documentation of the -o and -O options for more information.',
+            )
+            warnings.warn(message, ScrapyDeprecationWarning, stacklevel=2)
+            return {output[0]: {'format': output_format}}
+        else:
+            raise UsageError(
+                'The -t command-line option cannot be used if multiple output '
+                'URIs are specified'
+            )
+
+    result = {}
+    for element in output:
+        try:
+            feed_uri, feed_format = element.rsplit(':', 1)
+        except ValueError:
+            feed_uri = element
+            feed_format = os.path.splitext(element)[1].replace('.', '')
+        else:
+            if feed_uri == '-':
+                feed_uri = 'stdout:'
+        check_valid_format(feed_format)
+        result[feed_uri] = {'format': feed_format}
+        if overwrite:
+            result[feed_uri]['overwrite'] = True
+
+    # FEEDS setting should take precedence over the matching CLI options
+    result.update(settings.getdict('FEEDS'))
+
+    return result
diff --git a/scrapy/utils/console.py b/scrapy/utils/console.py
index 5dd4539ed..133261fd7 100644
--- a/scrapy/utils/console.py
+++ b/scrapy/utils/console.py
@@ -1,37 +1,104 @@
+from functools import wraps
+from collections import OrderedDict
 
-def start_python_console(namespace=None, noipython=False, banner=''):
-    """Start Python console binded to the given namespace. If IPython is
-    available, an IPython console will be started instead, unless `noipython`
-    is True. Also, tab completion will be used on Unix systems.
+
+def _embed_ipython_shell(namespace={}, banner=''):
+    """Start an IPython Shell"""
+    try:
+        from IPython.terminal.embed import InteractiveShellEmbed
+        from IPython.terminal.ipapp import load_default_config
+    except ImportError:
+        from IPython.frontend.terminal.embed import InteractiveShellEmbed
+        from IPython.frontend.terminal.ipapp import load_default_config
+
+    @wraps(_embed_ipython_shell)
+    def wrapper(namespace=namespace, banner=''):
+        config = load_default_config()
+        # Always use .instace() to ensure _instance propagation to all parents
+        # this is needed for <TAB> completion works well for new imports
+        # and clear the instance to always have the fresh env
+        # on repeated breaks like with inspect_response()
+        InteractiveShellEmbed.clear_instance()
+        shell = InteractiveShellEmbed.instance(
+            banner1=banner, user_ns=namespace, config=config)
+        shell()
+    return wrapper
+
+
+def _embed_bpython_shell(namespace={}, banner=''):
+    """Start a bpython shell"""
+    import bpython
+
+    @wraps(_embed_bpython_shell)
+    def wrapper(namespace=namespace, banner=''):
+        bpython.embed(locals_=namespace, banner=banner)
+    return wrapper
+
+
+def _embed_ptpython_shell(namespace={}, banner=''):
+    """Start a ptpython shell"""
+    import ptpython.repl
+
+    @wraps(_embed_ptpython_shell)
+    def wrapper(namespace=namespace, banner=''):
+        print(banner)
+        ptpython.repl.embed(locals=namespace)
+    return wrapper
+
+
+def _embed_standard_shell(namespace={}, banner=''):
+    """Start a standard python shell"""
+    import code
+    try:  # readline module is only available on unix systems
+        import readline
+    except ImportError:
+        pass
+    else:
+        import rlcompleter  # noqa: F401
+        readline.parse_and_bind("tab:complete")
+
+    @wraps(_embed_standard_shell)
+    def wrapper(namespace=namespace, banner=''):
+        code.interact(banner=banner, local=namespace)
+    return wrapper
+
+
+DEFAULT_PYTHON_SHELLS = OrderedDict([
+    ('ptpython', _embed_ptpython_shell),
+    ('ipython', _embed_ipython_shell),
+    ('bpython', _embed_bpython_shell),
+    ('python', _embed_standard_shell),
+])
+
+
+def get_shell_embed_func(shells=None, known_shells=None):
+    """Return the first acceptable shell-embed function
+    from a given list of shell names.
+    """
+    if shells is None:  # list, preference order of shells
+        shells = DEFAULT_PYTHON_SHELLS.keys()
+    if known_shells is None:  # available embeddable shells
+        known_shells = DEFAULT_PYTHON_SHELLS.copy()
+    for shell in shells:
+        if shell in known_shells:
+            try:
+                # function test: run all setup code (imports),
+                # but dont fall into the shell
+                return known_shells[shell]()
+            except ImportError:
+                continue
+
+
+def start_python_console(namespace=None, banner='', shells=None):
+    """Start Python console bound to the given namespace.
+    Readline support and tab completion will be used on Unix, if available.
     """
     if namespace is None:
         namespace = {}
 
     try:
-        try: # use IPython if available
-            if noipython:
-                raise ImportError()
-
-            try:
-                try:
-                    from IPython.terminal import embed
-                except ImportError:
-                    from IPython.frontend.terminal import embed
-                sh = embed.InteractiveShellEmbed(banner1=banner)
-            except ImportError:
-                from IPython.Shell import IPShellEmbed
-                sh = IPShellEmbed(banner=banner)
-
-            sh(global_ns={}, local_ns=namespace)
-        except ImportError:
-            import code
-            try: # readline module is only available on unix systems
-                import readline
-            except ImportError:
-                pass
-            else:
-                import rlcompleter
-                readline.parse_and_bind("tab:complete")
-            code.interact(banner=banner, local=namespace)
-    except SystemExit: # raised when using exit() in python code.interact
+        shell = get_shell_embed_func(shells)
+        if shell is not None:
+            shell(namespace=namespace, banner=banner)
+    except SystemExit:  # raised when using exit() in python code.interact
         pass
diff --git a/scrapy/utils/curl.py b/scrapy/utils/curl.py
new file mode 100644
index 000000000..9c0efcec4
--- /dev/null
+++ b/scrapy/utils/curl.py
@@ -0,0 +1,100 @@
+import argparse
+import warnings
+from shlex import split
+from http.cookies import SimpleCookie
+from urllib.parse import urlparse
+
+from w3lib.http import basic_auth_header
+
+
+class CurlParser(argparse.ArgumentParser):
+    def error(self, message):
+        error_msg = 'There was an error parsing the curl command: {}'.format(message)
+        raise ValueError(error_msg)
+
+
+curl_parser = CurlParser()
+curl_parser.add_argument('url')
+curl_parser.add_argument('-H', '--header', dest='headers', action='append')
+curl_parser.add_argument('-X', '--request', dest='method')
+curl_parser.add_argument('-d', '--data', '--data-raw', dest='data')
+curl_parser.add_argument('-u', '--user', dest='auth')
+
+
+safe_to_ignore_arguments = [
+    ['--compressed'],
+    # `--compressed` argument is not safe to ignore, but it's included here
+    # because the `HttpCompressionMiddleware` is enabled by default
+    ['-s', '--silent'],
+    ['-v', '--verbose'],
+    ['-#', '--progress-bar']
+]
+
+for argument in safe_to_ignore_arguments:
+    curl_parser.add_argument(*argument, action='store_true')
+
+
+def curl_to_request_kwargs(curl_command, ignore_unknown_options=True):
+    """Convert a cURL command syntax to Request kwargs.
+
+    :param str curl_command: string containing the curl command
+    :param bool ignore_unknown_options: If true, only a warning is emitted when
+                                        cURL options are unknown. Otherwise
+                                        raises an error. (default: True)
+    :return: dictionary of Request kwargs
+    """
+
+    curl_args = split(curl_command)
+
+    if curl_args[0] != 'curl':
+        raise ValueError('A curl command must start with "curl"')
+
+    parsed_args, argv = curl_parser.parse_known_args(curl_args[1:])
+
+    if argv:
+        msg = 'Unrecognized options: {}'.format(', '.join(argv))
+        if ignore_unknown_options:
+            warnings.warn(msg)
+        else:
+            raise ValueError(msg)
+
+    url = parsed_args.url
+
+    # curl automatically prepends 'http' if the scheme is missing, but Request
+    # needs the scheme to work
+    parsed_url = urlparse(url)
+    if not parsed_url.scheme:
+        url = 'http://' + url
+
+    method = parsed_args.method or 'GET'
+
+    result = {'method': method.upper(), 'url': url}
+
+    headers = []
+    cookies = {}
+    for header in parsed_args.headers or ():
+        name, val = header.split(':', 1)
+        name = name.strip()
+        val = val.strip()
+        if name.title() == 'Cookie':
+            for name, morsel in SimpleCookie(val).items():
+                cookies[name] = morsel.value
+        else:
+            headers.append((name, val))
+
+    if parsed_args.auth:
+        user, password = parsed_args.auth.split(':', 1)
+        headers.append(('Authorization', basic_auth_header(user, password)))
+
+    if headers:
+        result['headers'] = headers
+    if cookies:
+        result['cookies'] = cookies
+    if parsed_args.data:
+        result['body'] = parsed_args.data
+        if not parsed_args.method:
+            # if the "data" is specified but the "method" is not specified,
+            # the default method is 'POST'
+            result['method'] = 'POST'
+
+    return result
diff --git a/scrapy/utils/datatypes.py b/scrapy/utils/datatypes.py
index 097bd1ac9..e31284a7f 100644
--- a/scrapy/utils/datatypes.py
+++ b/scrapy/utils/datatypes.py
@@ -5,159 +5,9 @@ Python Standard Library.
 This module must not depend on any module outside the Standard Library.
 """
 
-import copy
-import six
-from collections import OrderedDict
-
-
-class MultiValueDictKeyError(KeyError):
-    pass
-
-class MultiValueDict(dict):
-    """
-    A subclass of dictionary customized to handle multiple values for the same key.
-
-    >>> d = MultiValueDict({'name': ['Adrian', 'Simon'], 'position': ['Developer']})
-    >>> d['name']
-    'Simon'
-    >>> d.getlist('name')
-    ['Adrian', 'Simon']
-    >>> d.get('lastname', 'nonexistent')
-    'nonexistent'
-    >>> d.setlist('lastname', ['Holovaty', 'Willison'])
-
-    This class exists to solve the irritating problem raised by cgi.parse_qs,
-    which returns a list for every key, even though most Web forms submit
-    single name-value pairs.
-    """
-    def __init__(self, key_to_list_mapping=()):
-        dict.__init__(self, key_to_list_mapping)
-
-    def __repr__(self):
-        return "<%s: %s>" % (self.__class__.__name__, dict.__repr__(self))
-
-    def __getitem__(self, key):
-        """
-        Returns the last data value for this key, or [] if it's an empty list;
-        raises KeyError if not found.
-        """
-        try:
-            list_ = dict.__getitem__(self, key)
-        except KeyError:
-            raise MultiValueDictKeyError("Key %r not found in %r" % (key, self))
-        try:
-            return list_[-1]
-        except IndexError:
-            return []
-
-    def __setitem__(self, key, value):
-        dict.__setitem__(self, key, [value])
-
-    def __copy__(self):
-        return self.__class__(dict.items(self))
-
-    def __deepcopy__(self, memo=None):
-        if memo is None:
-            memo = {}
-        result = self.__class__()
-        memo[id(self)] = result
-        for key, value in dict.items(self):
-            dict.__setitem__(result, copy.deepcopy(key, memo), copy.deepcopy(value, memo))
-        return result
-
-    def get(self, key, default=None):
-        "Returns the default value if the requested data doesn't exist"
-        try:
-            val = self[key]
-        except KeyError:
-            return default
-        if val == []:
-            return default
-        return val
-
-    def getlist(self, key):
-        "Returns an empty list if the requested data doesn't exist"
-        try:
-            return dict.__getitem__(self, key)
-        except KeyError:
-            return []
-
-    def setlist(self, key, list_):
-        dict.__setitem__(self, key, list_)
-
-    def setdefault(self, key, default=None):
-        if key not in self:
-            self[key] = default
-        return self[key]
-
-    def setlistdefault(self, key, default_list=()):
-        if key not in self:
-            self.setlist(key, default_list)
-        return self.getlist(key)
-
-    def appendlist(self, key, value):
-        "Appends an item to the internal list associated with key"
-        self.setlistdefault(key, [])
-        dict.__setitem__(self, key, self.getlist(key) + [value])
-
-    def items(self):
-        """
-        Returns a list of (key, value) pairs, where value is the last item in
-        the list associated with the key.
-        """
-        return [(key, self[key]) for key in self.keys()]
-
-    def lists(self):
-        "Returns a list of (key, list) pairs."
-        return dict.items(self)
-
-    def values(self):
-        "Returns a list of the last value on every key list."
-        return [self[key] for key in self.keys()]
-
-    def copy(self):
-        "Returns a copy of this object."
-        return self.__deepcopy__()
-
-    def update(self, *args, **kwargs):
-        "update() extends rather than replaces existing key lists. Also accepts keyword args."
-        if len(args) > 1:
-            raise TypeError("update expected at most 1 arguments, got %d" % len(args))
-        if args:
-            other_dict = args[0]
-            if isinstance(other_dict, MultiValueDict):
-                for key, value_list in other_dict.lists():
-                    self.setlistdefault(key, []).extend(value_list)
-            else:
-                try:
-                    for key, value in other_dict.items():
-                        self.setlistdefault(key, []).append(value)
-                except TypeError:
-                    raise ValueError("MultiValueDict.update() takes either a MultiValueDict or dictionary")
-        for key, value in six.iteritems(kwargs):
-            self.setlistdefault(key, []).append(value)
-
-class SiteNode(object):
-    """Class to represent a site node (page, image or any other file)"""
-
-    def __init__(self, url):
-        self.url = url
-        self.itemnames = []
-        self.children = []
-        self.parent = None
-
-    def add_child(self, node):
-        self.children.append(node)
-        node.parent = self
-
-    def to_string(self, level=0):
-        s = "%s%s\n" % ('  '*level, self.url)
-        if self.itemnames:
-            for n in self.itemnames:
-                s += "%sScraped: %s\n" % ('  '*(level+1), n)
-        for node in self.children:
-            s += node.to_string(level+1)
-        return s
+import collections
+import weakref
+from collections.abc import Mapping
 
 
 class CaselessDict(dict):
@@ -165,7 +15,7 @@ class CaselessDict(dict):
     __slots__ = ()
 
     def __init__(self, seq=None):
-        super(CaselessDict, self).__init__()
+        super().__init__()
         if seq:
             self.update(seq)
 
@@ -201,9 +51,9 @@ class CaselessDict(dict):
         return dict.setdefault(self, self.normkey(key), self.normvalue(def_val))
 
     def update(self, seq):
-        seq = seq.items() if isinstance(seq, dict) else seq
+        seq = seq.items() if isinstance(seq, Mapping) else seq
         iseq = ((self.normkey(k), self.normvalue(v)) for k, v in seq)
-        super(CaselessDict, self).update(iseq)
+        super().update(iseq)
 
     @classmethod
     def fromkeys(cls, keys, value=None):
@@ -213,71 +63,57 @@ class CaselessDict(dict):
         return dict.pop(self, self.normkey(key), *args)
 
 
-class MergeDict(object):
-    """
-    A simple class for creating new "virtual" dictionaries that actually look
-    up values in more than one dictionary, passed in the constructor.
-
-    If a key appears in more than one of the given dictionaries, only the
-    first occurrence will be used.
-    """
-    def __init__(self, *dicts):
-        self.dicts = dicts
-
-    def __getitem__(self, key):
-        for dict_ in self.dicts:
-            try:
-                return dict_[key]
-            except KeyError:
-                pass
-        raise KeyError
-
-    def __copy__(self):
-        return self.__class__(*self.dicts)
-
-    def get(self, key, default=None):
-        try:
-            return self[key]
-        except KeyError:
-            return default
-
-    def getlist(self, key):
-        for dict_ in self.dicts:
-            if key in dict_.keys():
-                return dict_.getlist(key)
-        return []
-
-    def items(self):
-        item_list = []
-        for dict_ in self.dicts:
-            item_list.extend(dict_.items())
-        return item_list
-
-    def has_key(self, key):
-        for dict_ in self.dicts:
-            if key in dict_:
-                return True
-        return False
-
-    __contains__ = has_key
-
-    def copy(self):
-        """Returns a copy of this object."""
-        return self.__copy__()
-
-
-class LocalCache(OrderedDict):
+class LocalCache(collections.OrderedDict):
     """Dictionary with a finite number of keys.
 
     Older items expires first.
-
     """
 
     def __init__(self, limit=None):
-        super(LocalCache, self).__init__()
+        super().__init__()
         self.limit = limit
 
     def __setitem__(self, key, value):
-        while len(self) >= self.limit:
-            self.popitem(last=False)
-        super(LocalCache, self).__setitem__(key, value)
+        if self.limit:
+            while len(self) >= self.limit:
+                self.popitem(last=False)
+        super().__setitem__(key, value)
+
+
+class LocalWeakReferencedCache(weakref.WeakKeyDictionary):
+    """
+    A weakref.WeakKeyDictionary implementation that uses LocalCache as its
+    underlying data structure, making it ordered and capable of being size-limited.
+
+    Useful for memoization, while avoiding keeping received
+    arguments in memory only because of the cached references.
+
+    Note: like LocalCache and unlike weakref.WeakKeyDictionary,
+    it cannot be instantiated with an initial dictionary.
+    """
+
+    def __init__(self, limit=None):
+        super().__init__()
+        self.data = LocalCache(limit=limit)
+
+    def __setitem__(self, key, value):
+        try:
+            super().__setitem__(key, value)
+        except TypeError:
+            pass  # key is not weak-referenceable, skip caching
+
+    def __getitem__(self, key):
+        try:
+            return super().__getitem__(key)
+        except (TypeError, KeyError):
+            return None  # key is either not weak-referenceable or not cached
+
+
+class SequenceExclude:
+    """Object to test if an item is NOT within some sequence."""
+
+    def __init__(self, seq):
+        self.seq = seq
+
+    def __contains__(self, item):
+        return item not in self.seq
diff --git a/scrapy/utils/decorator.py b/scrapy/utils/decorators.py
similarity index 99%
rename from scrapy/utils/decorator.py
rename to scrapy/utils/decorators.py
index 38bee1a6c..2e2c7adc1 100644
--- a/scrapy/utils/decorator.py
+++ b/scrapy/utils/decorators.py
@@ -34,6 +34,7 @@ def defers(func):
         return defer.maybeDeferred(func, *a, **kw)
     return wrapped
 
+
 def inthread(func):
     """Decorator to call a function in a thread and return a deferred with the
     result
diff --git a/scrapy/utils/defer.py b/scrapy/utils/defer.py
index 45a451d08..a3950db75 100644
--- a/scrapy/utils/defer.py
+++ b/scrapy/utils/defer.py
@@ -1,28 +1,43 @@
 """
 Helper functions for dealing with Twisted deferreds
 """
+import asyncio
+import inspect
+from functools import wraps
 
-from twisted.internet import defer, reactor, task
+from twisted.internet import defer, task
 from twisted.python import failure
 
 from scrapy.exceptions import IgnoreRequest
+from scrapy.utils.reactor import is_asyncio_reactor_installed
+
 
 def defer_fail(_failure):
-    """Same as twisted.internet.defer.fail, but delay calling errback until
+    """Same as twisted.internet.defer.fail but delay calling errback until
     next reactor loop
+
+    It delays by 100ms so reactor has a chance to go through readers and writers
+    before attending pending delayed calls, so do not set delay to zero.
     """
+    from twisted.internet import reactor
     d = defer.Deferred()
-    reactor.callLater(0, d.errback, _failure)
+    reactor.callLater(0.1, d.errback, _failure)
     return d
 
+
 def defer_succeed(result):
-    """Same as twsited.internet.defer.succed, but delay calling callback until
+    """Same as twisted.internet.defer.succeed but delay calling callback until
     next reactor loop
+
+    It delays by 100ms so reactor has a chance to go trough readers and writers
+    before attending pending delayed calls, so do not set delay to zero.
     """
+    from twisted.internet import reactor
     d = defer.Deferred()
-    reactor.callLater(0, d.callback, result)
+    reactor.callLater(0.1, d.callback, result)
     return d
 
+
 def defer_result(result):
     if isinstance(result, defer.Deferred):
         return result
@@ -31,6 +46,7 @@ def defer_result(result):
     else:
         return defer_succeed(result)
 
+
 def mustbe_deferred(f, *args, **kw):
     """Same as twisted.internet.defer.maybeDeferred, but delay calling
     callback/errback to next reactor loop
@@ -42,20 +58,22 @@ def mustbe_deferred(f, *args, **kw):
     # exception in Scrapy - see #125
     except IgnoreRequest as e:
         return defer_fail(failure.Failure(e))
-    except:
+    except Exception:
         return defer_fail(failure.Failure())
     else:
         return defer_result(result)
 
+
 def parallel(iterable, count, callable, *args, **named):
     """Execute a callable over the objects in the given iterable, in parallel,
     using no more than ``count`` concurrent calls.
 
-    Taken from: http://jcalderone.livejournal.com/24285.html
+    Taken from: https://jcalderone.livejournal.com/24285.html
     """
     coop = task.Cooperator()
     work = (callable(elem, *args, **named) for elem in iterable)
-    return defer.DeferredList([coop.coiterate(work) for i in xrange(count)])
+    return defer.DeferredList([coop.coiterate(work) for _ in range(count)])
+
 
 def process_chain(callbacks, input, *a, **kw):
     """Return a Deferred built by chaining the given callbacks"""
@@ -65,18 +83,23 @@ def process_chain(callbacks, input, *a, **kw):
     d.callback(input)
     return d
 
+
 def process_chain_both(callbacks, errbacks, input, *a, **kw):
     """Return a Deferred built by chaining the given callbacks and errbacks"""
     d = defer.Deferred()
     for cb, eb in zip(callbacks, errbacks):
-        d.addCallbacks(cb, eb, callbackArgs=a, callbackKeywords=kw,
-            errbackArgs=a, errbackKeywords=kw)
+        d.addCallbacks(
+            callback=cb, errback=eb,
+            callbackArgs=a, callbackKeywords=kw,
+            errbackArgs=a, errbackKeywords=kw,
+        )
     if isinstance(input, failure.Failure):
         d.errback(input)
     else:
         d.callback(input)
     return d
 
+
 def process_parallel(callbacks, input, *a, **kw):
     """Return a Deferred with the output of all successful calls to the given
     callbacks
@@ -86,15 +109,67 @@ def process_parallel(callbacks, input, *a, **kw):
     d.addCallbacks(lambda r: [x[1] for x in r], lambda f: f.value.subFailure)
     return d
 
+
 def iter_errback(iterable, errback, *a, **kw):
     """Wraps an iterable calling an errback if an error is caught while
     iterating it.
     """
     it = iter(iterable)
-    while 1:
+    while True:
         try:
             yield next(it)
         except StopIteration:
             break
-        except:
+        except Exception:
             errback(failure.Failure(), *a, **kw)
+
+
+def _isfuture(o):
+    # workaround for Python before 3.5.3 not having asyncio.isfuture
+    if hasattr(asyncio, 'isfuture'):
+        return asyncio.isfuture(o)
+    return isinstance(o, asyncio.Future)
+
+
+def deferred_from_coro(o):
+    """Converts a coroutine into a Deferred, or returns the object as is if it isn't a coroutine"""
+    if isinstance(o, defer.Deferred):
+        return o
+    if _isfuture(o) or inspect.isawaitable(o):
+        if not is_asyncio_reactor_installed():
+            # wrapping the coroutine directly into a Deferred, this doesn't work correctly with coroutines
+            # that use asyncio, e.g. "await asyncio.sleep(1)"
+            return defer.ensureDeferred(o)
+        else:
+            # wrapping the coroutine into a Future and then into a Deferred, this requires AsyncioSelectorReactor
+            return defer.Deferred.fromFuture(asyncio.ensure_future(o))
+    return o
+
+
+def deferred_f_from_coro_f(coro_f):
+    """ Converts a coroutine function into a function that returns a Deferred.
+
+    The coroutine function will be called at the time when the wrapper is called. Wrapper args will be passed to it.
+    This is useful for callback chains, as callback functions are called with the previous callback result.
+    """
+    @wraps(coro_f)
+    def f(*coro_args, **coro_kwargs):
+        return deferred_from_coro(coro_f(*coro_args, **coro_kwargs))
+    return f
+
+
+def maybeDeferred_coro(f, *args, **kw):
+    """ Copy of defer.maybeDeferred that also converts coroutines to Deferreds. """
+    try:
+        result = f(*args, **kw)
+    except:  # noqa: E722
+        return defer.fail(failure.Failure(captureVars=defer.Deferred.debug))
+
+    if isinstance(result, defer.Deferred):
+        return result
+    elif _isfuture(result) or inspect.isawaitable(result):
+        return deferred_from_coro(result)
+    elif isinstance(result, failure.Failure):
+        return defer.fail(result)
+    else:
+        return defer.succeed(result)
diff --git a/scrapy/utils/deprecate.py b/scrapy/utils/deprecate.py
index 6f1940611..3c8e3c8b5 100644
--- a/scrapy/utils/deprecate.py
+++ b/scrapy/utils/deprecate.py
@@ -7,21 +7,25 @@ from scrapy.exceptions import ScrapyDeprecationWarning
 
 def attribute(obj, oldattr, newattr, version='0.12'):
     cname = obj.__class__.__name__
-    warnings.warn("%s.%s attribute is deprecated and will be no longer supported "
-        "in Scrapy %s, use %s.%s attribute instead" % \
-        (cname, oldattr, version, cname, newattr), ScrapyDeprecationWarning, stacklevel=3)
+    warnings.warn(
+        "%s.%s attribute is deprecated and will be no longer supported "
+        "in Scrapy %s, use %s.%s attribute instead"
+        % (cname, oldattr, version, cname, newattr),
+        ScrapyDeprecationWarning,
+        stacklevel=3)
 
 
-def create_deprecated_class(name, new_class, clsdict=None,
-                            warn_category=ScrapyDeprecationWarning,
-                            warn_once=True,
-                            old_class_path=None,
-                            new_class_path=None,
-                            subclass_warn_message="{cls} inherits from "\
-                                    "deprecated class {old}, please inherit "\
-                                    "from {new}.",
-                            instance_warn_message="{cls} is deprecated, "\
-                                    "instantiate {new} instead."):
+def create_deprecated_class(
+    name,
+    new_class,
+    clsdict=None,
+    warn_category=ScrapyDeprecationWarning,
+    warn_once=True,
+    old_class_path=None,
+    new_class_path=None,
+    subclass_warn_message="{cls} inherits from deprecated class {old}, please inherit from {new}.",
+    instance_warn_message="{cls} is deprecated, instantiate {new} instead."
+):
     """
     Return a "deprecated" class that causes its subclasses to issue a warning.
     Subclasses of ``new_class`` are considered subclasses of this class.
@@ -53,7 +57,7 @@ def create_deprecated_class(name, new_class, clsdict=None,
         warned_on_subclass = False
 
         def __new__(metacls, name, bases, clsdict_):
-            cls = super(DeprecatedClass, metacls).__new__(metacls, name, bases, clsdict_)
+            cls = super().__new__(metacls, name, bases, clsdict_)
             if metacls.deprecated_class is None:
                 metacls.deprecated_class = cls
             return cls
@@ -69,10 +73,10 @@ def create_deprecated_class(name, new_class, clsdict=None,
                 if warn_once:
                     msg += ' (warning only on first subclass, there may be others)'
                 warnings.warn(msg, warn_category, stacklevel=2)
-            super(DeprecatedClass, cls).__init__(name, bases, clsdict_)
+            super().__init__(name, bases, clsdict_)
 
-        # see http://www.python.org/dev/peps/pep-3119/#overloading-isinstance-and-issubclass
-        # and http://docs.python.org/2/reference/datamodel.html#customizing-instance-and-subclass-checks
+        # see https://www.python.org/dev/peps/pep-3119/#overloading-isinstance-and-issubclass
+        # and https://docs.python.org/reference/datamodel.html#customizing-instance-and-subclass-checks
         # for implementation details
         def __instancecheck__(cls, inst):
             return any(cls.__subclasscheck__(c)
@@ -84,7 +88,7 @@ def create_deprecated_class(name, new_class, clsdict=None,
                 # is the deprecated class itself - subclasses of the
                 # deprecated class should not use custom `__subclasscheck__`
                 # method.
-                return super(DeprecatedClass, cls).__subclasscheck__(sub)
+                return super().__subclasscheck__(sub)
 
             if not inspect.isclass(sub):
                 raise TypeError("issubclass() arg 1 must be a class")
@@ -98,7 +102,7 @@ def create_deprecated_class(name, new_class, clsdict=None,
                 msg = instance_warn_message.format(cls=_clspath(cls, old_class_path),
                                                    new=_clspath(new_class, new_class_path))
                 warnings.warn(msg, warn_category, stacklevel=2)
-            return super(DeprecatedClass, cls).__call__(*args, **kwargs)
+            return super().__call__(*args, **kwargs)
 
     deprecated_cls = DeprecatedClass(name, (new_class,), clsdict or {})
 
@@ -121,3 +125,51 @@ def _clspath(cls, forced=None):
     if forced is not None:
         return forced
     return '{}.{}'.format(cls.__module__, cls.__name__)
+
+
+DEPRECATION_RULES = [
+    ('scrapy.telnet.', 'scrapy.extensions.telnet.'),
+]
+
+
+def update_classpath(path):
+    """Update a deprecated path from an object with its new location"""
+    for prefix, replacement in DEPRECATION_RULES:
+        if path.startswith(prefix):
+            new_path = path.replace(prefix, replacement, 1)
+            warnings.warn("`{}` class is deprecated, use `{}` instead".format(path, new_path),
+                          ScrapyDeprecationWarning)
+            return new_path
+    return path
+
+
+def method_is_overridden(subclass, base_class, method_name):
+    """
+    Return True if a method named ``method_name`` of a ``base_class``
+    is overridden in a ``subclass``.
+
+    >>> class Base:
+    ...     def foo(self):
+    ...         pass
+    >>> class Sub1(Base):
+    ...     pass
+    >>> class Sub2(Base):
+    ...     def foo(self):
+    ...         pass
+    >>> class Sub3(Sub1):
+    ...     def foo(self):
+    ...         pass
+    >>> class Sub4(Sub2):
+    ...     pass
+    >>> method_is_overridden(Sub1, Base, 'foo')
+    False
+    >>> method_is_overridden(Sub2, Base, 'foo')
+    True
+    >>> method_is_overridden(Sub3, Base, 'foo')
+    True
+    >>> method_is_overridden(Sub4, Base, 'foo')
+    True
+    """
+    base_method = getattr(base_class, method_name)
+    sub_method = getattr(subclass, method_name)
+    return base_method.__code__ is not sub_method.__code__
diff --git a/scrapy/utils/display.py b/scrapy/utils/display.py
index f6a6c4645..f4d17224b 100644
--- a/scrapy/utils/display.py
+++ b/scrapy/utils/display.py
@@ -2,23 +2,47 @@
 pprint and pformat wrappers with colorization support
 """
 
-from __future__ import print_function
+import ctypes
+import platform
 import sys
+from distutils.version import LooseVersion as parse_version
 from pprint import pformat as pformat_
 
+
+def _enable_windows_terminal_processing():
+    # https://stackoverflow.com/a/36760881
+    kernel32 = ctypes.windll.kernel32
+    return bool(kernel32.SetConsoleMode(kernel32.GetStdHandle(-11), 7))
+
+
+def _tty_supports_color():
+    if sys.platform != "win32":
+        return True
+
+    if parse_version(platform.version()) < parse_version("10.0.14393"):
+        return True
+
+    # Windows >= 10.0.14393 interprets ANSI escape sequences providing terminal
+    # processing is enabled.
+    return _enable_windows_terminal_processing()
+
+
 def _colorize(text, colorize=True):
-    if not colorize or not sys.stdout.isatty():
+    if not colorize or not sys.stdout.isatty() or not _tty_supports_color():
         return text
     try:
         from pygments import highlight
+    except ImportError:
+        return text
+    else:
         from pygments.formatters import TerminalFormatter
         from pygments.lexers import PythonLexer
         return highlight(text, PythonLexer(), TerminalFormatter())
-    except ImportError:
-        return text
+
 
 def pformat(obj, *args, **kwargs):
     return _colorize(pformat_(obj), kwargs.pop('colorize', True))
 
+
 def pprint(obj, *args, **kwargs):
     print(pformat(obj, *args, **kwargs))
diff --git a/scrapy/utils/engine.py b/scrapy/utils/engine.py
index 11dd36d91..267c7ecd1 100644
--- a/scrapy/utils/engine.py
+++ b/scrapy/utils/engine.py
@@ -1,7 +1,8 @@
 """Some debugging functions for working with the Scrapy engine"""
 
-from __future__ import print_function
-from time import time # used in global tests code
+# used in global tests code
+from time import time  # noqa: F401
+
 
 def get_engine_status(engine):
     """Return a report of the current engine status"""
@@ -32,6 +33,7 @@ def get_engine_status(engine):
 
     return checks
 
+
 def format_engine_status(engine=None):
     checks = get_engine_status(engine)
     s = "Execution engine status\n\n"
@@ -41,5 +43,6 @@ def format_engine_status(engine=None):
 
     return s
 
+
 def print_engine_status(engine):
     print(format_engine_status(engine))
diff --git a/scrapy/utils/ftp.py b/scrapy/utils/ftp.py
index f255d436f..19d56d6ec 100644
--- a/scrapy/utils/ftp.py
+++ b/scrapy/utils/ftp.py
@@ -1,8 +1,11 @@
-from ftplib import error_perm
+import posixpath
+
+from ftplib import error_perm, FTP
 from posixpath import dirname
 
+
 def ftp_makedirs_cwd(ftp, path, first_call=True):
-    """Set the current directory of the FTP connection given in the `ftp`
+    """Set the current directory of the FTP connection given in the ``ftp``
     argument (as a ftplib.FTP object), creating all parent directories if they
     don't exist. The ftplib.FTP object must be already connected and logged in.
     """
@@ -13,3 +16,22 @@ def ftp_makedirs_cwd(ftp, path, first_call=True):
         ftp.mkd(path)
         if first_call:
             ftp.cwd(path)
+
+
+def ftp_store_file(
+        *, path, file, host, port,
+        username, password, use_active_mode=False, overwrite=True):
+    """Opens a FTP connection with passed credentials,sets current directory
+    to the directory extracted from given path, then uploads the file to server
+    """
+    with FTP() as ftp:
+        ftp.connect(host, port)
+        ftp.login(username, password)
+        if use_active_mode:
+            ftp.set_pasv(False)
+        file.seek(0)
+        dirname, filename = posixpath.split(path)
+        ftp_makedirs_cwd(ftp, dirname)
+        command = 'STOR' if overwrite else 'APPE'
+        ftp.storbinary('%s %s' % (command, filename), file)
+        file.close()
diff --git a/scrapy/utils/gz.py b/scrapy/utils/gz.py
index 741948359..fbd7bd18f 100644
--- a/scrapy/utils/gz.py
+++ b/scrapy/utils/gz.py
@@ -1,11 +1,20 @@
+from gzip import GzipFile
+from io import BytesIO
+import re
 import struct
 
-try:
-    from cStringIO import StringIO as BytesIO
-except ImportError:
-    from io import BytesIO
+from scrapy.utils.decorators import deprecated
+
+
+# - Python>=3.5 GzipFile's read() has issues returning leftover
+#   uncompressed data when input is corrupted
+#   (regression or bug-fix compared to Python 3.4)
+# - read1(), which fetches data before raising EOFError on next call
+#   works here but is only available from Python>=3.3
+@deprecated('GzipFile.read1')
+def read1(gzf, size=-1):
+    return gzf.read1(size)
 
-from gzip import GzipFile
 
 def gunzip(data):
     """Gunzip the given data and return as much data as possible.
@@ -13,25 +22,38 @@ def gunzip(data):
     This is resilient to CRC checksum errors.
     """
     f = GzipFile(fileobj=BytesIO(data))
-    output = b''
+    output_list = []
     chunk = b'.'
     while chunk:
         try:
-            chunk = f.read(8196)
-            output += chunk
+            chunk = f.read1(8196)
+            output_list.append(chunk)
         except (IOError, EOFError, struct.error):
             # complete only if there is some data, otherwise re-raise
             # see issue 87 about catching struct.error
-            # some pages are quite small so output is '' and f.extrabuf
+            # some pages are quite small so output_list is empty and f.extrabuf
             # contains the whole page content
-            if output or f.extrabuf:
-                output += f.extrabuf
-                break
+            if output_list or getattr(f, 'extrabuf', None):
+                try:
+                    output_list.append(f.extrabuf[-f.extrasize:])
+                finally:
+                    break
             else:
                 raise
-    return output
+    return b''.join(output_list)
 
+
+_is_gzipped = re.compile(br'^application/(x-)?gzip\b', re.I).search
+_is_octetstream = re.compile(br'^(application|binary)/octet-stream\b', re.I).search
+
+
+@deprecated
 def is_gzipped(response):
     """Return True if the response is gzipped, or False otherwise"""
-    ctype = response.headers.get('Content-Type', '')
-    return ctype in ('application/x-gzip', 'application/gzip')
+    ctype = response.headers.get('Content-Type', b'')
+    cenc = response.headers.get('Content-Encoding', b'').lower()
+    return _is_gzipped(ctype) or _is_octetstream(ctype) and cenc in (b'gzip', b'x-gzip')
+
+
+def gzip_magic_number(response):
+    return response.body[:3] == b'\x1f\x8b\x08'
diff --git a/scrapy/utils/http.py b/scrapy/utils/http.py
index 8b659a22a..ceb3f0509 100644
--- a/scrapy/utils/http.py
+++ b/scrapy/utils/http.py
@@ -4,14 +4,25 @@ Transitional module for moving to the w3lib library.
 For new code, always import from w3lib.http instead of this module
 """
 
-from w3lib.http import *
+import warnings
 
+from scrapy.exceptions import ScrapyDeprecationWarning
+from scrapy.utils.decorators import deprecated
+from w3lib.http import *  # noqa: F401
+
+
+warnings.warn("Module `scrapy.utils.http` is deprecated, "
+              "Please import from `w3lib.http` instead.",
+              ScrapyDeprecationWarning, stacklevel=2)
+
+
+@deprecated
 def decode_chunked_transfer(chunked_body):
     """Parsed body received with chunked transfer encoding, and return the
     decoded body.
 
     For more info see:
-    http://en.wikipedia.org/wiki/Chunked_transfer_encoding
+    https://en.wikipedia.org/wiki/Chunked_transfer_encoding
 
     """
     body, h, t = '', '', chunked_body
@@ -21,6 +32,5 @@ def decode_chunked_transfer(chunked_body):
             break
         size = int(h, 16)
         body += t[:size]
-        t = t[size+2:]
+        t = t[size + 2:]
     return body
-
diff --git a/scrapy/utils/httpobj.py b/scrapy/utils/httpobj.py
index b4c929b0e..c8d4391b1 100644
--- a/scrapy/utils/httpobj.py
+++ b/scrapy/utils/httpobj.py
@@ -1,10 +1,12 @@
 """Helper functions for scrapy.http objects (Request, Response)"""
 
 import weakref
+from urllib.parse import urlparse
 
-from six.moves.urllib.parse import urlparse
 
 _urlparse_cache = weakref.WeakKeyDictionary()
+
+
 def urlparse_cached(request_or_response):
     """Return urlparse.urlparse caching the result, where the argument can be a
     Request or Response object
diff --git a/scrapy/utils/iterators.py b/scrapy/utils/iterators.py
index 11b873f2e..c356ad7f8 100644
--- a/scrapy/utils/iterators.py
+++ b/scrapy/utils/iterators.py
@@ -1,27 +1,29 @@
-import re, csv, six
-
-try:
-    from cStringIO import StringIO as BytesIO
-except ImportError:
-    from io import BytesIO
+import csv
+import logging
+import re
+from io import StringIO
 
 from scrapy.http import TextResponse, Response
 from scrapy.selector import Selector
-from scrapy import log
-from scrapy.utils.python import re_rsearch, str_to_unicode
+from scrapy.utils.python import re_rsearch, to_unicode
+
+
+logger = logging.getLogger(__name__)
 
 
 def xmliter(obj, nodename):
     """Return a iterator of Selector's over all nodes of a XML document,
-       given tha name of the node to iterate. Useful for parsing XML feeds.
+       given the name of the node to iterate. Useful for parsing XML feeds.
 
     obj can be:
     - a Response object
     - a unicode string
     - a string encoded as utf-8
     """
+    nodename_patt = re.escape(nodename)
+
     DOCUMENT_HEADER_RE = re.compile(r'<\?xml[^>]+>\s*', re.S)
-    HEADER_END_RE = re.compile(r'<\s*/%s\s*>' % nodename, re.S)
+    HEADER_END_RE = re.compile(r'<\s*/%s\s*>' % nodename_patt, re.S)
     END_TAG_RE = re.compile(r'<\s*/([^\s>]+)\s*>', re.S)
     NAMESPACE_RE = re.compile(r'((xmlns[:A-Za-z]*)=[^>\s]+)', re.S)
     text = _body_or_str(obj)
@@ -37,13 +39,53 @@ def xmliter(obj, nodename):
             if tag:
                 namespaces.update(reversed(x) for x in re.findall(NAMESPACE_RE, tag.group()))
 
-    r = re.compile(r"<%s[\s>].*?</%s>" % (nodename, nodename), re.DOTALL)
+    r = re.compile(r'<%(np)s[\s>].*?</%(np)s>' % {'np': nodename_patt}, re.DOTALL)
     for match in r.finditer(text):
         nodetext = document_header + match.group().replace(nodename, '%s %s' % (nodename, ' '.join(namespaces.values())), 1) + header_end
         yield Selector(text=nodetext, type='xml')
 
 
-def csviter(obj, delimiter=None, headers=None, encoding=None):
+def xmliter_lxml(obj, nodename, namespace=None, prefix='x'):
+    from lxml import etree
+    reader = _StreamReader(obj)
+    tag = '{%s}%s' % (namespace, nodename) if namespace else nodename
+    iterable = etree.iterparse(reader, tag=tag, encoding=reader.encoding)
+    selxpath = '//' + ('%s:%s' % (prefix, nodename) if namespace else nodename)
+    for _, node in iterable:
+        nodetext = etree.tostring(node, encoding='unicode')
+        node.clear()
+        xs = Selector(text=nodetext, type='xml')
+        if namespace:
+            xs.register_namespace(prefix, namespace)
+        yield xs.xpath(selxpath)[0]
+
+
+class _StreamReader:
+
+    def __init__(self, obj):
+        self._ptr = 0
+        if isinstance(obj, Response):
+            self._text, self.encoding = obj.body, obj.encoding
+        else:
+            self._text, self.encoding = obj, 'utf-8'
+        self._is_unicode = isinstance(self._text, str)
+
+    def read(self, n=65535):
+        self.read = self._read_unicode if self._is_unicode else self._read_string
+        return self.read(n).lstrip()
+
+    def _read_string(self, n=65535):
+        s, e = self._ptr, self._ptr + n
+        self._ptr = e
+        return self._text[s:e]
+
+    def _read_unicode(self, n=65535):
+        s, e = self._ptr, self._ptr + n
+        self._ptr = e
+        return self._text[s:e].encode('utf-8')
+
+
+def csviter(obj, delimiter=None, headers=None, encoding=None, quotechar=None):
     """ Returns an iterator of dictionaries from the given csv object
 
     obj can be:
@@ -51,45 +93,63 @@ def csviter(obj, delimiter=None, headers=None, encoding=None):
     - a unicode string
     - a string encoded as utf-8
 
-    delimiter is the character used to separate field on the given obj.
+    delimiter is the character used to separate fields on the given obj.
 
     headers is an iterable that when provided offers the keys
     for the returned dictionaries, if not the first row is used.
-    """
-    encoding = obj.encoding if isinstance(obj, TextResponse) else encoding or 'utf-8'
-    def _getrow(csv_r):
-        return [str_to_unicode(field, encoding) for field in next(csv_r)]
 
-    lines = BytesIO(_body_or_str(obj, unicode=False))
+    quotechar is the character used to enclosure fields on the given obj.
+    """
+
+    encoding = obj.encoding if isinstance(obj, TextResponse) else encoding or 'utf-8'
+
+    def row_to_unicode(row_):
+        return [to_unicode(field, encoding) for field in row_]
+
+    lines = StringIO(_body_or_str(obj, unicode=True))
+
+    kwargs = {}
     if delimiter:
-        csv_r = csv.reader(lines, delimiter=delimiter)
-    else:
-        csv_r = csv.reader(lines)
+        kwargs["delimiter"] = delimiter
+    if quotechar:
+        kwargs["quotechar"] = quotechar
+    csv_r = csv.reader(lines, **kwargs)
 
     if not headers:
-        headers = _getrow(csv_r)
+        try:
+            row = next(csv_r)
+        except StopIteration:
+            return
+        headers = row_to_unicode(row)
 
-    while True:
-        row = _getrow(csv_r)
+    for row in csv_r:
+        row = row_to_unicode(row)
         if len(row) != len(headers):
-            log.msg(format="ignoring row %(csvlnum)d (length: %(csvrow)d, should be: %(csvheader)d)",
-                    level=log.WARNING, csvlnum=csv_r.line_num, csvrow=len(row), csvheader=len(headers))
+            logger.warning("ignoring row %(csvlnum)d (length: %(csvrow)d, "
+                           "should be: %(csvheader)d)",
+                           {'csvlnum': csv_r.line_num, 'csvrow': len(row),
+                            'csvheader': len(headers)})
             continue
         else:
             yield dict(zip(headers, row))
 
 
 def _body_or_str(obj, unicode=True):
-    assert isinstance(obj, (Response, six.string_types)), \
-        "obj must be Response or basestring, not %s" % type(obj).__name__
+    expected_types = (Response, str, bytes)
+    if not isinstance(obj, expected_types):
+        expected_types_str = " or ".join(t.__name__ for t in expected_types)
+        raise TypeError(
+            "Object %r must be %s, not %s"
+            % (obj, expected_types_str, type(obj).__name__)
+        )
     if isinstance(obj, Response):
         if not unicode:
             return obj.body
         elif isinstance(obj, TextResponse):
-            return obj.body_as_unicode()
+            return obj.text
         else:
             return obj.body.decode('utf-8')
-    elif isinstance(obj, six.text_type):
+    elif isinstance(obj, str):
         return obj if unicode else obj.encode('utf-8')
     else:
         return obj.decode('utf-8') if unicode else obj
diff --git a/scrapy/utils/job.py b/scrapy/utils/job.py
index 389fde73a..4f1e601fc 100644
--- a/scrapy/utils/job.py
+++ b/scrapy/utils/job.py
@@ -1,5 +1,6 @@
 import os
 
+
 def job_dir(settings):
     path = settings['JOBDIR']
     if path and not os.path.exists(path):
diff --git a/scrapy/utils/log.py b/scrapy/utils/log.py
new file mode 100644
index 000000000..e41315738
--- /dev/null
+++ b/scrapy/utils/log.py
@@ -0,0 +1,215 @@
+import logging
+import sys
+import warnings
+from logging.config import dictConfig
+
+from twisted.python import log as twisted_log
+from twisted.python.failure import Failure
+
+import scrapy
+from scrapy.exceptions import ScrapyDeprecationWarning
+from scrapy.settings import Settings
+from scrapy.utils.versions import scrapy_components_versions
+
+
+logger = logging.getLogger(__name__)
+
+
+def failure_to_exc_info(failure):
+    """Extract exc_info from Failure instances"""
+    if isinstance(failure, Failure):
+        return (failure.type, failure.value, failure.getTracebackObject())
+
+
+class TopLevelFormatter(logging.Filter):
+    """Keep only top level loggers's name (direct children from root) from
+    records.
+
+    This filter will replace Scrapy loggers' names with 'scrapy'. This mimics
+    the old Scrapy log behaviour and helps shortening long names.
+
+    Since it can't be set for just one logger (it won't propagate for its
+    children), it's going to be set in the root handler, with a parametrized
+    ``loggers`` list where it should act.
+    """
+
+    def __init__(self, loggers=None):
+        self.loggers = loggers or []
+
+    def filter(self, record):
+        if any(record.name.startswith(logger + '.') for logger in self.loggers):
+            record.name = record.name.split('.', 1)[0]
+        return True
+
+
+DEFAULT_LOGGING = {
+    'version': 1,
+    'disable_existing_loggers': False,
+    'loggers': {
+        'scrapy': {
+            'level': 'DEBUG',
+        },
+        'twisted': {
+            'level': 'ERROR',
+        },
+    }
+}
+
+
+def configure_logging(settings=None, install_root_handler=True):
+    """
+    Initialize logging defaults for Scrapy.
+
+    :param settings: settings used to create and configure a handler for the
+        root logger (default: None).
+    :type settings: dict, :class:`~scrapy.settings.Settings` object or ``None``
+
+    :param install_root_handler: whether to install root logging handler
+        (default: True)
+    :type install_root_handler: bool
+
+    This function does:
+
+    - Route warnings and twisted logging through Python standard logging
+    - Assign DEBUG and ERROR level to Scrapy and Twisted loggers respectively
+    - Route stdout to log if LOG_STDOUT setting is True
+
+    When ``install_root_handler`` is True (default), this function also
+    creates a handler for the root logger according to given settings
+    (see :ref:`topics-logging-settings`). You can override default options
+    using ``settings`` argument. When ``settings`` is empty or None, defaults
+    are used.
+    """
+    if not sys.warnoptions:
+        # Route warnings through python logging
+        logging.captureWarnings(True)
+
+    observer = twisted_log.PythonLoggingObserver('twisted')
+    observer.start()
+
+    dictConfig(DEFAULT_LOGGING)
+
+    if isinstance(settings, dict) or settings is None:
+        settings = Settings(settings)
+
+    if settings.getbool('LOG_STDOUT'):
+        sys.stdout = StreamLogger(logging.getLogger('stdout'))
+
+    if install_root_handler:
+        install_scrapy_root_handler(settings)
+
+
+def install_scrapy_root_handler(settings):
+    global _scrapy_root_handler
+
+    if (_scrapy_root_handler is not None
+            and _scrapy_root_handler in logging.root.handlers):
+        logging.root.removeHandler(_scrapy_root_handler)
+    logging.root.setLevel(logging.NOTSET)
+    _scrapy_root_handler = _get_handler(settings)
+    logging.root.addHandler(_scrapy_root_handler)
+
+
+def get_scrapy_root_handler():
+    return _scrapy_root_handler
+
+
+_scrapy_root_handler = None
+
+
+def _get_handler(settings):
+    """ Return a log handler object according to settings """
+    filename = settings.get('LOG_FILE')
+    if filename:
+        encoding = settings.get('LOG_ENCODING')
+        handler = logging.FileHandler(filename, encoding=encoding)
+    elif settings.getbool('LOG_ENABLED'):
+        handler = logging.StreamHandler()
+    else:
+        handler = logging.NullHandler()
+
+    formatter = logging.Formatter(
+        fmt=settings.get('LOG_FORMAT'),
+        datefmt=settings.get('LOG_DATEFORMAT')
+    )
+    handler.setFormatter(formatter)
+    handler.setLevel(settings.get('LOG_LEVEL'))
+    if settings.getbool('LOG_SHORT_NAMES'):
+        handler.addFilter(TopLevelFormatter(['scrapy']))
+    return handler
+
+
+def log_scrapy_info(settings):
+    logger.info("Scrapy %(version)s started (bot: %(bot)s)",
+                {'version': scrapy.__version__, 'bot': settings['BOT_NAME']})
+    versions = [
+        "%s %s" % (name, version)
+        for name, version in scrapy_components_versions()
+        if name != "Scrapy"
+    ]
+    logger.info("Versions: %(versions)s", {'versions': ", ".join(versions)})
+    from twisted.internet import reactor
+    logger.debug("Using reactor: %s.%s", reactor.__module__, reactor.__class__.__name__)
+    from twisted.internet import asyncioreactor
+    if isinstance(reactor, asyncioreactor.AsyncioSelectorReactor):
+        logger.debug(
+            "Using asyncio event loop: %s.%s",
+            reactor._asyncioEventloop.__module__,
+            reactor._asyncioEventloop.__class__.__name__,
+        )
+
+
+class StreamLogger:
+    """Fake file-like stream object that redirects writes to a logger instance
+
+    Taken from:
+        https://www.electricmonk.nl/log/2011/08/14/redirect-stdout-and-stderr-to-a-logger-in-python/
+    """
+    def __init__(self, logger, log_level=logging.INFO):
+        self.logger = logger
+        self.log_level = log_level
+        self.linebuf = ''
+
+    def write(self, buf):
+        for line in buf.rstrip().splitlines():
+            self.logger.log(self.log_level, line.rstrip())
+
+    def flush(self):
+        for h in self.logger.handlers:
+            h.flush()
+
+
+class LogCounterHandler(logging.Handler):
+    """Record log levels count into a crawler stats"""
+
+    def __init__(self, crawler, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.crawler = crawler
+
+    def emit(self, record):
+        sname = 'log_count/{}'.format(record.levelname)
+        self.crawler.stats.inc_value(sname)
+
+
+def logformatter_adapter(logkws):
+    """
+    Helper that takes the dictionary output from the methods in LogFormatter
+    and adapts it into a tuple of positional arguments for logger.log calls,
+    handling backward compatibility as well.
+    """
+    if not {'level', 'msg', 'args'} <= set(logkws):
+        warnings.warn('Missing keys in LogFormatter method',
+                      ScrapyDeprecationWarning)
+
+    if 'format' in logkws:
+        warnings.warn('`format` key in LogFormatter methods has been '
+                      'deprecated, use `msg` instead',
+                      ScrapyDeprecationWarning)
+
+    level = logkws.get('level', logging.INFO)
+    message = logkws.get('format', logkws.get('msg'))
+    # NOTE: This also handles 'args' being an empty dict, that case doesn't
+    # play well in logger.log calls
+    args = logkws if not logkws.get('args') else logkws['args']
+
+    return (level, message, args)
diff --git a/scrapy/utils/markup.py b/scrapy/utils/markup.py
index 977133f4e..9728c542a 100644
--- a/scrapy/utils/markup.py
+++ b/scrapy/utils/markup.py
@@ -3,5 +3,12 @@ Transitional module for moving to the w3lib library.
 
 For new code, always import from w3lib.html instead of this module
 """
+import warnings
 
-from w3lib.html import *
+from scrapy.exceptions import ScrapyDeprecationWarning
+from w3lib.html import *  # noqa: F401
+
+
+warnings.warn("Module `scrapy.utils.markup` is deprecated. "
+              "Please import from `w3lib.html` instead.",
+              ScrapyDeprecationWarning, stacklevel=2)
diff --git a/scrapy/utils/misc.py b/scrapy/utils/misc.py
index be394eb1d..bd400bd30 100644
--- a/scrapy/utils/misc.py
+++ b/scrapy/utils/misc.py
@@ -1,17 +1,25 @@
-"""Helper functions which doesn't fit anywhere else"""
+"""Helper functions which don't fit anywhere else"""
+import ast
+import inspect
+import os
 import re
 import hashlib
+import warnings
+from collections import deque
+from contextlib import contextmanager
 from importlib import import_module
 from pkgutil import iter_modules
+from textwrap import dedent
 
-import six
 from w3lib.html import replace_entities
 
-from scrapy.utils.python import flatten
-from scrapy.item import BaseItem
+from scrapy.utils.datatypes import LocalWeakReferencedCache
+from scrapy.utils.python import flatten, to_unicode
+from scrapy.item import _BaseItem
+from scrapy.utils.deprecate import ScrapyDeprecationWarning
 
 
-_ITERABLE_SINGLE_VALUES = dict, BaseItem, six.text_type, bytes
+_ITERABLE_SINGLE_VALUES = dict, _BaseItem, str, bytes
 
 
 def arg_to_iter(arg):
@@ -31,8 +39,8 @@ def arg_to_iter(arg):
 def load_object(path):
     """Load an object given its absolute object path, and return it.
 
-    object can be a class, function, variable o instance.
-    path ie: 'scrapy.contrib.downloadermiddelware.redirect.RedirectMiddleware'
+    object can be the import path of a class, function, variable or an
+    instance, e.g. 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware'
     """
 
     try:
@@ -40,11 +48,8 @@ def load_object(path):
     except ValueError:
         raise ValueError("Error loading object '%s': not a full path" % path)
 
-    module, name = path[:dot], path[dot+1:]
-    try:
-        mod = import_module(module)
-    except ImportError as e:
-        raise ImportError("Error loading object '%s': %s" % (path, e))
+    module, name = path[:dot], path[dot + 1:]
+    mod = import_module(module)
 
     try:
         obj = getattr(mod, name)
@@ -54,8 +59,8 @@ def load_object(path):
     return obj
 
 
-def walk_modules(path, load=False):
-    """Loads a module and all its submodules from a the given module path and
+def walk_modules(path):
+    """Loads a module and all its submodules from the given module path and
     returns them. If *any* module throws an exception while importing, that
     exception is thrown back.
 
@@ -83,20 +88,26 @@ def extract_regex(regex, text, encoding='utf-8'):
     * if the regex contains multiple numbered groups, all those will be returned (flattened)
     * if the regex doesn't contain any group the entire regex matching is returned
     """
+    warnings.warn(
+        "scrapy.utils.misc.extract_regex has moved to parsel.utils.extract_regex.",
+        ScrapyDeprecationWarning,
+        stacklevel=2
+    )
 
-    if isinstance(regex, basestring):
+    if isinstance(regex, str):
         regex = re.compile(regex, re.UNICODE)
 
     try:
         strings = [regex.search(text).group('extract')]   # named group
-    except:
+    except Exception:
         strings = regex.findall(text)    # full regex or numbered groups
     strings = flatten(strings)
 
-    if isinstance(text, unicode):
+    if isinstance(text, str):
         return [replace_entities(s, keep=['lt', 'amp']) for s in strings]
     else:
-        return [replace_entities(unicode(s, encoding), keep=['lt', 'amp']) for s in strings]
+        return [replace_entities(to_unicode(s, encoding), keep=['lt', 'amp'])
+                for s in strings]
 
 
 def md5sum(file):
@@ -108,9 +119,124 @@ def md5sum(file):
     '784406af91dd5a54fbb9c84c2236595a'
     """
     m = hashlib.md5()
-    while 1:
+    while True:
         d = file.read(8096)
         if not d:
             break
         m.update(d)
     return m.hexdigest()
+
+
+def rel_has_nofollow(rel):
+    """Return True if link rel attribute has nofollow type"""
+    return rel is not None and 'nofollow' in rel.split()
+
+
+def create_instance(objcls, settings, crawler, *args, **kwargs):
+    """Construct a class instance using its ``from_crawler`` or
+    ``from_settings`` constructors, if available.
+
+    At least one of ``settings`` and ``crawler`` needs to be different from
+    ``None``. If ``settings `` is ``None``, ``crawler.settings`` will be used.
+    If ``crawler`` is ``None``, only the ``from_settings`` constructor will be
+    tried.
+
+    ``*args`` and ``**kwargs`` are forwarded to the constructors.
+
+    Raises ``ValueError`` if both ``settings`` and ``crawler`` are ``None``.
+
+    .. versionchanged:: 2.2
+       Raises ``TypeError`` if the resulting instance is ``None`` (e.g. if an
+       extension has not been implemented correctly).
+    """
+    if settings is None:
+        if crawler is None:
+            raise ValueError("Specify at least one of settings and crawler.")
+        settings = crawler.settings
+    if crawler and hasattr(objcls, 'from_crawler'):
+        instance = objcls.from_crawler(crawler, *args, **kwargs)
+        method_name = 'from_crawler'
+    elif hasattr(objcls, 'from_settings'):
+        instance = objcls.from_settings(settings, *args, **kwargs)
+        method_name = 'from_settings'
+    else:
+        instance = objcls(*args, **kwargs)
+        method_name = '__new__'
+    if instance is None:
+        raise TypeError("%s.%s returned None" % (objcls.__qualname__, method_name))
+    return instance
+
+
+@contextmanager
+def set_environ(**kwargs):
+    """Temporarily set environment variables inside the context manager and
+    fully restore previous environment afterwards
+    """
+
+    original_env = {k: os.environ.get(k) for k in kwargs}
+    os.environ.update(kwargs)
+    try:
+        yield
+    finally:
+        for k, v in original_env.items():
+            if v is None:
+                del os.environ[k]
+            else:
+                os.environ[k] = v
+
+
+def walk_callable(node):
+    """Similar to ``ast.walk``, but walks only function body and skips nested
+    functions defined within the node.
+    """
+    todo = deque([node])
+    walked_func_def = False
+    while todo:
+        node = todo.popleft()
+        if isinstance(node, ast.FunctionDef):
+            if walked_func_def:
+                continue
+            walked_func_def = True
+        todo.extend(ast.iter_child_nodes(node))
+        yield node
+
+
+_generator_callbacks_cache = LocalWeakReferencedCache(limit=128)
+
+
+def is_generator_with_return_value(callable):
+    """
+    Returns True if a callable is a generator function which includes a
+    'return' statement with a value different than None, False otherwise
+    """
+    if callable in _generator_callbacks_cache:
+        return _generator_callbacks_cache[callable]
+
+    def returns_none(return_node):
+        value = return_node.value
+        return value is None or isinstance(value, ast.NameConstant) and value.value is None
+
+    if inspect.isgeneratorfunction(callable):
+        tree = ast.parse(dedent(inspect.getsource(callable)))
+        for node in walk_callable(tree):
+            if isinstance(node, ast.Return) and not returns_none(node):
+                _generator_callbacks_cache[callable] = True
+                return _generator_callbacks_cache[callable]
+
+    _generator_callbacks_cache[callable] = False
+    return _generator_callbacks_cache[callable]
+
+
+def warn_on_generator_with_return_value(spider, callable):
+    """
+    Logs a warning if a callable is a generator function and includes
+    a 'return' statement with a value different than None
+    """
+    if is_generator_with_return_value(callable):
+        warnings.warn(
+            'The "{}.{}" method is a generator and includes a "return" statement with a '
+            'value different than None. This could lead to unexpected behaviour. Please see '
+            'https://docs.python.org/3/reference/simple_stmts.html#the-return-statement '
+            'for details about the semantics of the "return" statement within generators'
+            .format(spider.__class__.__name__, callable.__name__), stacklevel=2,
+        )
diff --git a/scrapy/utils/multipart.py b/scrapy/utils/multipart.py
index ec26c0866..5dcf791b8 100644
--- a/scrapy/utils/multipart.py
+++ b/scrapy/utils/multipart.py
@@ -3,5 +3,13 @@ Transitional module for moving to the w3lib library.
 
 For new code, always import from w3lib.form instead of this module
 """
+import warnings
 
-from w3lib.form import *
+from scrapy.exceptions import ScrapyDeprecationWarning
+from w3lib.form import *  # noqa: F401
+
+
+warnings.warn("Module `scrapy.utils.multipart` is deprecated. "
+              "If you're using `encode_multipart` function, please use "
+              "`urllib3.filepost.encode_multipart_formdata` instead",
+              ScrapyDeprecationWarning, stacklevel=2)
diff --git a/scrapy/utils/ossignal.py b/scrapy/utils/ossignal.py
index df4eee5ec..cf867f3f8 100644
--- a/scrapy/utils/ossignal.py
+++ b/scrapy/utils/ossignal.py
@@ -1,28 +1,25 @@
-
-from __future__ import absolute_import
-
-from twisted.internet import reactor
-
 import signal
 
+
 signal_names = {}
 for signame in dir(signal):
-    if signame.startswith("SIG"):
+    if signame.startswith('SIG') and not signame.startswith('SIG_'):
         signum = getattr(signal, signame)
         if isinstance(signum, int):
             signal_names[signum] = signame
 
+
 def install_shutdown_handlers(function, override_sigint=True):
     """Install the given function as a signal handler for all common shutdown
     signals (such as SIGINT, SIGTERM, etc). If override_sigint is ``False`` the
     SIGINT handler won't be install if there is already a handler in place
     (e.g.  Pdb)
     """
+    from twisted.internet import reactor
     reactor._handleSignals()
     signal.signal(signal.SIGTERM, function)
-    if signal.getsignal(signal.SIGINT) == signal.default_int_handler or \
-            override_sigint:
+    if signal.getsignal(signal.SIGINT) == signal.default_int_handler or override_sigint:
         signal.signal(signal.SIGINT, function)
     # Catch Ctrl-Break in windows
-    if hasattr(signal, "SIGBREAK"):
+    if hasattr(signal, 'SIGBREAK'):
         signal.signal(signal.SIGBREAK, function)
diff --git a/scrapy/utils/project.py b/scrapy/utils/project.py
index a15a0d90f..b8d3ebf9d 100644
--- a/scrapy/utils/project.py
+++ b/scrapy/utils/project.py
@@ -1,5 +1,5 @@
 import os
-from six.moves import cPickle as pickle
+import pickle
 import warnings
 
 from importlib import import_module
@@ -7,11 +7,13 @@ from os.path import join, dirname, abspath, isabs, exists
 
 from scrapy.utils.conf import closest_scrapy_cfg, get_config, init_env
 from scrapy.settings import Settings
-from scrapy.exceptions import NotConfigured
+from scrapy.exceptions import NotConfigured, ScrapyDeprecationWarning
+
 
 ENVVAR = 'SCRAPY_SETTINGS_MODULE'
 DATADIR_CFG_SECTION = 'datadir'
 
+
 def inside_project():
     scrapy_module = os.environ.get('SCRAPY_SETTINGS_MODULE')
     if scrapy_module is not None:
@@ -23,6 +25,7 @@ def inside_project():
             return True
     return bool(closest_scrapy_cfg())
 
+
 def project_data_dir(project='default'):
     """Return the current project data dir, creating it if it doesn't exist"""
     if not inside_project():
@@ -39,16 +42,22 @@ def project_data_dir(project='default'):
         os.makedirs(d)
     return d
 
+
 def data_path(path, createdir=False):
-    """If path is relative, return the given path inside the project data dir,
-    otherwise return the path unmodified
+    """
+    Return the given path joined with the .scrapy data directory.
+    If given an absolute path, return it unmodified.
     """
     if not isabs(path):
-        path = join(project_data_dir(), path)
+        if inside_project():
+            path = join(project_data_dir(), path)
+        else:
+            path = join('.scrapy', path)
     if createdir and not exists(path):
         os.makedirs(path)
     return path
 
+
 def get_project_settings():
     if ENVVAR not in os.environ:
         project = os.environ.get('SCRAPY_PROJECT', 'default')
@@ -59,15 +68,31 @@ def get_project_settings():
     if settings_module_path:
         settings.setmodule(settings_module_path, priority='project')
 
-    # XXX: remove this hack
     pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE")
     if pickled_settings:
+        warnings.warn("Use of environment variable "
+                      "'SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE' "
+                      "is deprecated.", ScrapyDeprecationWarning)
         settings.setdict(pickle.loads(pickled_settings), priority='project')
 
-    # XXX: deprecate and remove this functionality
-    env_overrides = {k[7:]: v for k, v in os.environ.items() if
-                     k.startswith('SCRAPY_')}
-    if env_overrides:
-        settings.setdict(env_overrides, priority='project')
+    scrapy_envvars = {k[7:]: v for k, v in os.environ.items() if
+                      k.startswith('SCRAPY_')}
+    valid_envvars = {
+        'CHECK',
+        'PICKLED_SETTINGS_TO_OVERRIDE',
+        'PROJECT',
+        'PYTHON_SHELL',
+        'SETTINGS_MODULE',
+    }
+    setting_envvars = {k for k in scrapy_envvars if k not in valid_envvars}
+    if setting_envvars:
+        setting_envvar_list = ', '.join(sorted(setting_envvars))
+        warnings.warn(
+            'Use of environment variables prefixed with SCRAPY_ to override '
+            'settings is deprecated. The following environment variables are '
+            'currently defined: {}'.format(setting_envvar_list),
+            ScrapyDeprecationWarning
+        )
+    settings.setdict(scrapy_envvars, priority='project')
 
     return settings
diff --git a/scrapy/utils/py36.py b/scrapy/utils/py36.py
new file mode 100644
index 000000000..c8c24076e
--- /dev/null
+++ b/scrapy/utils/py36.py
@@ -0,0 +1,10 @@
+"""
+Helpers using Python 3.6+ syntax (ignore SyntaxError on import).
+"""
+
+
+async def collect_asyncgen(result):
+    results = []
+    async for x in result:
+        results.append(x)
+    return results
diff --git a/scrapy/utils/python.py b/scrapy/utils/python.py
index 551d337eb..1f2333264 100644
--- a/scrapy/utils/python.py
+++ b/scrapy/utils/python.py
@@ -1,17 +1,18 @@
 """
 This module contains essential stuff that should've come with Python itself ;)
-
-It also contains functions (or functionality) which is in Python versions
-higher than 2.5 which used to be the lowest version supported by Scrapy.
-
 """
-import os
-import re
-import inspect
-import weakref
 import errno
-import six
+import gc
+import inspect
+import re
+import sys
+import warnings
+import weakref
 from functools import partial, wraps
+from itertools import chain
+
+from scrapy.exceptions import ScrapyDeprecationWarning
+from scrapy.utils.decorators import deprecated
 
 
 def flatten(x):
@@ -25,15 +26,49 @@ def flatten(x):
     >>> [1, 2, [3,4], (5,6)]
     [1, 2, [3, 4], (5, 6)]
     >>> flatten([[[1,2,3], (42,None)], [4,5], [6], 7, (8,9,10)])
-    [1, 2, 3, 42, None, 4, 5, 6, 7, 8, 9, 10]"""
+    [1, 2, 3, 42, None, 4, 5, 6, 7, 8, 9, 10]
+    >>> flatten(["foo", "bar"])
+    ['foo', 'bar']
+    >>> flatten(["foo", ["baz", 42], "bar"])
+    ['foo', 'baz', 42, 'bar']
+    """
+    return list(iflatten(x))
 
-    result = []
+
+def iflatten(x):
+    """iflatten(sequence) -> iterator
+
+    Similar to ``.flatten()``, but returns iterator instead"""
     for el in x:
-        if hasattr(el, "__iter__"):
-            result.extend(flatten(el))
+        if is_listlike(el):
+            for el_ in iflatten(el):
+                yield el_
         else:
-            result.append(el)
-    return result
+            yield el
+
+
+def is_listlike(x):
+    """
+    >>> is_listlike("foo")
+    False
+    >>> is_listlike(5)
+    False
+    >>> is_listlike(b"foo")
+    False
+    >>> is_listlike([b"foo"])
+    True
+    >>> is_listlike((b"foo",))
+    True
+    >>> is_listlike({})
+    True
+    >>> is_listlike(set())
+    True
+    >>> is_listlike((x for x in range(3)))
+    True
+    >>> is_listlike(range(5))
+    True
+    """
+    return hasattr(x, "__iter__") and not isinstance(x, (str, bytes))
 
 
 def unique(list_, key=lambda x: x):
@@ -49,37 +84,37 @@ def unique(list_, key=lambda x: x):
     return result
 
 
-def str_to_unicode(text, encoding=None, errors='strict'):
-    """Return the unicode representation of text in the given encoding. Unlike
-    .encode(encoding) this function can be applied directly to a unicode
-    object without the risk of double-decoding problems (which can happen if
-    you don't use the default 'ascii' encoding)
-    """
-
-    if encoding is None:
-        encoding = 'utf-8'
+def to_unicode(text, encoding=None, errors='strict'):
+    """Return the unicode representation of a bytes object ``text``. If
+    ``text`` is already an unicode object, return it as-is."""
     if isinstance(text, str):
-        return text.decode(encoding, errors)
-    elif isinstance(text, unicode):
         return text
-    else:
-        raise TypeError('str_to_unicode must receive a str or unicode object, got %s' % type(text).__name__)
-
-def unicode_to_str(text, encoding=None, errors='strict'):
-    """Return the str representation of text in the given encoding. Unlike
-    .encode(encoding) this function can be applied directly to a str
-    object without the risk of double-decoding problems (which can happen if
-    you don't use the default 'ascii' encoding)
-    """
-
+    if not isinstance(text, (bytes, str)):
+        raise TypeError('to_unicode must receive a bytes or str '
+                        'object, got %s' % type(text).__name__)
     if encoding is None:
         encoding = 'utf-8'
-    if isinstance(text, unicode):
-        return text.encode(encoding, errors)
-    elif isinstance(text, str):
+    return text.decode(encoding, errors)
+
+
+def to_bytes(text, encoding=None, errors='strict'):
+    """Return the binary representation of ``text``. If ``text``
+    is already a bytes object, return it as-is."""
+    if isinstance(text, bytes):
         return text
-    else:
-        raise TypeError('unicode_to_str must receive a unicode or str object, got %s' % type(text).__name__)
+    if not isinstance(text, str):
+        raise TypeError('to_bytes must receive a str or bytes '
+                        'object, got %s' % type(text).__name__)
+    if encoding is None:
+        encoding = 'utf-8'
+    return text.encode(encoding, errors)
+
+
+@deprecated('to_unicode')
+def to_native_str(text, encoding=None, errors='strict'):
+    """ Return str representation of ``text``. """
+    return to_unicode(text, encoding, errors)
+
 
 def re_rsearch(pattern, text, chunk_size=1024):
     """
@@ -94,6 +129,7 @@ def re_rsearch(pattern, text, chunk_size=1024):
     In case the pattern wasn't found, None is returned, otherwise it returns a tuple containing
     the start position of the match, and the ending (regarding the entire text).
     """
+
     def _chunk_iter():
         offset = len(text)
         while True:
@@ -103,38 +139,67 @@ def re_rsearch(pattern, text, chunk_size=1024):
             yield (text[offset:], offset)
         yield (text, 0)
 
-    pattern = re.compile(pattern) if isinstance(pattern, basestring) else pattern
+    if isinstance(pattern, str):
+        pattern = re.compile(pattern)
+
     for chunk, offset in _chunk_iter():
         matches = [match for match in pattern.finditer(chunk)]
         if matches:
-            return (offset + matches[-1].span()[0], offset + matches[-1].span()[1])
+            start, end = matches[-1].span()
+            return offset + start, offset + end
     return None
 
+
 def memoizemethod_noargs(method):
     """Decorator to cache the result of a method (without arguments) using a
     weak reference to its object
     """
     cache = weakref.WeakKeyDictionary()
+
     @wraps(method)
     def new_method(self, *args, **kwargs):
         if self not in cache:
             cache[self] = method(self, *args, **kwargs)
         return cache[self]
+
     return new_method
 
-_BINARYCHARS = set(map(chr, range(32))) - set(["\0", "\t", "\n", "\r"])
 
-def isbinarytext(text):
-    """Return True if the given text is considered binary, or false
-    otherwise, by looking for binary bytes at their chars
+_BINARYCHARS = {to_bytes(chr(i)) for i in range(32)} - {b"\0", b"\t", b"\n", b"\r"}
+_BINARYCHARS |= {ord(ch) for ch in _BINARYCHARS}
+
+
+def binary_is_text(data):
+    """ Returns ``True`` if the given ``data`` argument (a ``bytes`` object)
+    does not contain unprintable control characters.
     """
-    assert isinstance(text, str), "text must be str, got '%s'" % type(text).__name__
-    return any(c in _BINARYCHARS for c in text)
+    if not isinstance(data, bytes):
+        raise TypeError("data must be bytes, got '%s'" % type(data).__name__)
+    return all(c not in _BINARYCHARS for c in data)
+
+
+def _getargspec_py23(func):
+    """_getargspec_py23(function) -> named tuple ArgSpec(args, varargs, keywords,
+                                                        defaults)
+
+    Was identical to inspect.getargspec() in python2, but uses
+    inspect.getfullargspec() for python3 behind the scenes to avoid
+    DeprecationWarning.
+
+    >>> def f(a, b=2, *ar, **kw):
+    ...     pass
+
+    >>> _getargspec_py23(f)
+    ArgSpec(args=['a', 'b'], varargs='ar', keywords='kw', defaults=(2,))
+    """
+    return inspect.ArgSpec(*inspect.getfullargspec(func)[:4])
+
 
 def get_func_args(func, stripself=False):
     """Return the argument name list of a callable"""
     if inspect.isfunction(func):
-        func_args, _, _, _ = inspect.getargspec(func)
+        spec = inspect.getfullargspec(func)
+        func_args = spec.args + spec.kwonlyargs
     elif inspect.isclass(func):
         return get_func_args(func.__init__, True)
     elif inspect.ismethod(func):
@@ -157,13 +222,14 @@ def get_func_args(func, stripself=False):
         func_args.pop(0)
     return func_args
 
+
 def get_spec(func):
     """Returns (args, kwargs) tuple for a function
     >>> import re
     >>> get_spec(re.match)
     (['pattern', 'string'], {'flags': 0})
 
-    >>> class Test(object):
+    >>> class Test:
     ...     def __call__(self, val):
     ...         pass
     ...     def method(self, val, flags=0):
@@ -180,9 +246,9 @@ def get_spec(func):
     """
 
     if inspect.isfunction(func) or inspect.ismethod(func):
-        spec = inspect.getargspec(func)
+        spec = _getargspec_py23(func)
     elif hasattr(func, '__call__'):
-        spec = inspect.getargspec(func.__call__)
+        spec = _getargspec_py23(func.__call__)
     else:
         raise TypeError('%s is not callable' % type(func))
 
@@ -193,33 +259,29 @@ def get_spec(func):
     kwargs = dict(zip(spec.args[firstdefault:], defaults))
     return args, kwargs
 
+
 def equal_attributes(obj1, obj2, attributes):
     """Compare two objects attributes"""
     # not attributes given return False by default
     if not attributes:
         return False
 
+    temp1, temp2 = object(), object()
     for attr in attributes:
         # support callables like itemgetter
         if callable(attr):
-            if not attr(obj1) == attr(obj2):
-                return False
-        else:
-            # check that objects has attribute
-            if not hasattr(obj1, attr):
-                return False
-            if not hasattr(obj2, attr):
-                return False
-            # compare object attributes
-            if not getattr(obj1, attr) == getattr(obj2, attr):
+            if attr(obj1) != attr(obj2):
                 return False
+        elif getattr(obj1, attr, temp1) != getattr(obj2, attr, temp2):
+            return False
     # all attributes equal
     return True
 
 
-class WeakKeyCache(object):
+class WeakKeyCache:
 
     def __init__(self, default_factory):
+        warnings.warn("The WeakKeyCache class is deprecated", category=ScrapyDeprecationWarning, stacklevel=2)
         self.default_factory = default_factory
         self._weakdict = weakref.WeakKeyDictionary()
 
@@ -229,36 +291,7 @@ class WeakKeyCache(object):
         return self._weakdict[key]
 
 
-def stringify_dict(dct_or_tuples, encoding='utf-8', keys_only=True):
-    """Return a (new) dict with the unicode keys (and values if, keys_only is
-    False) of the given dict converted to strings. `dct_or_tuples` can be a
-    dict or a list of tuples, like any dict constructor supports.
-    """
-    d = {}
-    for k, v in six.iteritems(dict(dct_or_tuples)):
-        k = k.encode(encoding) if isinstance(k, unicode) else k
-        if not keys_only:
-            v = v.encode(encoding) if isinstance(v, unicode) else v
-        d[k] = v
-    return d
-
-def is_writable(path):
-    """Return True if the given path can be written (if it exists) or created
-    (if it doesn't exist)
-    """
-    if os.path.exists(path):
-        return os.access(path, os.W_OK)
-    else:
-        return os.access(os.path.dirname(path), os.W_OK)
-
-def setattr_default(obj, name, value):
-    """Set attribute value, but only if it's not already set. Similar to
-    setdefault() for dicts.
-    """
-    if not hasattr(obj, name):
-        setattr(obj, name, value)
-
-
+@deprecated
 def retry_on_eintr(function, *args, **kw):
     """Run a function and retry it while getting EINTR errors"""
     while True:
@@ -267,3 +300,58 @@ def retry_on_eintr(function, *args, **kw):
         except IOError as e:
             if e.errno != errno.EINTR:
                 raise
+
+
+def without_none_values(iterable):
+    """Return a copy of ``iterable`` with all ``None`` entries removed.
+
+    If ``iterable`` is a mapping, return a dictionary where all pairs that have
+    value ``None`` have been removed.
+    """
+    try:
+        return {k: v for k, v in iterable.items() if v is not None}
+    except AttributeError:
+        return type(iterable)((v for v in iterable if v is not None))
+
+
+def global_object_name(obj):
+    """
+    Return full name of a global object.
+
+    >>> from scrapy import Request
+    >>> global_object_name(Request)
+    'scrapy.http.request.Request'
+    """
+    return "%s.%s" % (obj.__module__, obj.__name__)
+
+
+if hasattr(sys, "pypy_version_info"):
+    def garbage_collect():
+        # Collecting weakreferences can take two collections on PyPy.
+        gc.collect()
+        gc.collect()
+else:
+    def garbage_collect():
+        gc.collect()
+
+
+class MutableChain:
+    """
+    Thin wrapper around itertools.chain, allowing to add iterables "in-place"
+    """
+
+    def __init__(self, *args):
+        self.data = chain.from_iterable(args)
+
+    def extend(self, *iterables):
+        self.data = chain(self.data, chain.from_iterable(iterables))
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        return next(self.data)
+
+    @deprecated("scrapy.utils.python.MutableChain.__next__")
+    def next(self):
+        return self.__next__()
diff --git a/scrapy/utils/reactor.py b/scrapy/utils/reactor.py
index a99063a61..879d27907 100644
--- a/scrapy/utils/reactor.py
+++ b/scrapy/utils/reactor.py
@@ -1,15 +1,23 @@
-from twisted.internet import reactor, error
+import asyncio
+from contextlib import suppress
+
+from twisted.internet import asyncioreactor, error
+
+from scrapy.utils.misc import load_object
+
 
 def listen_tcp(portrange, host, factory):
     """Like reactor.listenTCP but tries different ports in a range."""
-    assert len(portrange) <= 2, "invalid portrange: %s" % portrange
-    if not hasattr(portrange, '__iter__'):
-        return reactor.listenTCP(portrange, factory, interface=host)
+    from twisted.internet import reactor
+    if len(portrange) > 2:
+        raise ValueError("invalid portrange: %s" % portrange)
     if not portrange:
         return reactor.listenTCP(0, factory, interface=host)
+    if not hasattr(portrange, '__iter__'):
+        return reactor.listenTCP(portrange, factory, interface=host)
     if len(portrange) == 1:
         return reactor.listenTCP(portrange[0], factory, interface=host)
-    for x in range(portrange[0], portrange[1]+1):
+    for x in range(portrange[0], portrange[1] + 1):
         try:
             return reactor.listenTCP(x, factory, interface=host)
         except error.CannotListenError:
@@ -17,9 +25,9 @@ def listen_tcp(portrange, host, factory):
                 raise
 
 
-class CallLaterOnce(object):
+class CallLaterOnce:
     """Schedule a function to be called in the next reactor loop, but only if
-    it hasn't been already scheduled since the last time it run.
+    it hasn't been already scheduled since the last time it ran.
     """
 
     def __init__(self, func, *a, **kw):
@@ -29,6 +37,7 @@ class CallLaterOnce(object):
         self._call = None
 
     def schedule(self, delay=0):
+        from twisted.internet import reactor
         if self._call is None:
             self._call = reactor.callLater(delay, self)
 
@@ -39,3 +48,42 @@ class CallLaterOnce(object):
     def __call__(self):
         self._call = None
         return self._func(*self._a, **self._kw)
+
+
+def install_reactor(reactor_path, event_loop_path=None):
+    """Installs the :mod:`~twisted.internet.reactor` with the specified
+    import path. Also installs the asyncio event loop with the specified import
+    path if the asyncio reactor is enabled"""
+    reactor_class = load_object(reactor_path)
+    if reactor_class is asyncioreactor.AsyncioSelectorReactor:
+        with suppress(error.ReactorAlreadyInstalledError):
+            if event_loop_path is not None:
+                event_loop_class = load_object(event_loop_path)
+                event_loop = event_loop_class()
+            else:
+                event_loop = asyncio.new_event_loop()
+            asyncioreactor.install(eventloop=event_loop)
+    else:
+        *module, _ = reactor_path.split(".")
+        installer_path = module + ["install"]
+        installer = load_object(".".join(installer_path))
+        with suppress(error.ReactorAlreadyInstalledError):
+            installer()
+
+
+def verify_installed_reactor(reactor_path):
+    """Raises :exc:`Exception` if the installed
+    :mod:`~twisted.internet.reactor` does not match the specified import
+    path."""
+    from twisted.internet import reactor
+    reactor_class = load_object(reactor_path)
+    if not isinstance(reactor, reactor_class):
+        msg = "The installed reactor ({}.{}) does not match the requested one ({})".format(
+            reactor.__module__, reactor.__class__.__name__, reactor_path
+        )
+        raise Exception(msg)
+
+
+def is_asyncio_reactor_installed():
+    from twisted.internet import reactor
+    return isinstance(reactor, asyncioreactor.AsyncioSelectorReactor)
diff --git a/scrapy/utils/reqser.py b/scrapy/utils/reqser.py
index d02585a32..5ea2aafb8 100644
--- a/scrapy/utils/reqser.py
+++ b/scrapy/utils/reqser.py
@@ -1,8 +1,12 @@
 """
 Helper functions for serializing (and deserializing) requests.
 """
+import inspect
 
 from scrapy.http import Request
+from scrapy.utils.python import to_unicode
+from scrapy.utils.misc import load_object
+
 
 def request_to_dict(request, spider=None):
     """Convert Request object to a dict.
@@ -17,7 +21,7 @@ def request_to_dict(request, spider=None):
     if callable(eb):
         eb = _find_method(spider, eb)
     d = {
-        'url': request.url.decode('ascii'), # urls should be safe (safe_string_url)
+        'url': to_unicode(request.url),  # urls should be safe (safe_string_url)
         'callback': cb,
         'errback': eb,
         'method': request.method,
@@ -28,7 +32,11 @@ def request_to_dict(request, spider=None):
         '_encoding': request._encoding,
         'priority': request.priority,
         'dont_filter': request.dont_filter,
+        'flags': request.flags,
+        'cb_kwargs': request.cb_kwargs,
     }
+    if type(request) is not Request:
+        d['_class'] = request.__module__ + '.' + request.__class__.__name__
     return d
 
 
@@ -44,8 +52,9 @@ def request_from_dict(d, spider=None):
     eb = d['errback']
     if eb and spider:
         eb = _get_method(spider, eb)
-    return Request(
-        url=d['url'].encode('ascii'),
+    request_cls = load_object(d['_class']) if '_class' in d else Request
+    return request_cls(
+        url=to_unicode(d['url']),
         callback=cb,
         errback=eb,
         method=d['method'],
@@ -55,14 +64,33 @@ def request_from_dict(d, spider=None):
         meta=d['meta'],
         encoding=d['_encoding'],
         priority=d['priority'],
-        dont_filter=d['dont_filter'])
+        dont_filter=d['dont_filter'],
+        flags=d.get('flags'),
+        cb_kwargs=d.get('cb_kwargs'),
+    )
 
 
 def _find_method(obj, func):
-    if obj and hasattr(func, 'im_self') and func.im_self is obj:
-        return func.im_func.__name__
-    else:
-        raise ValueError("Function %s is not a method of: %s" % (func, obj))
+    if obj:
+        try:
+            func_self = func.__self__
+        except AttributeError:  # func has no __self__
+            pass
+        else:
+            if func_self is obj:
+                members = inspect.getmembers(obj, predicate=inspect.ismethod)
+                for name, obj_func in members:
+                    # We need to use __func__ to access the original
+                    # function object because instance method objects
+                    # are generated each time attribute is retrieved from
+                    # instance.
+                    #
+                    # Reference: The standard type hierarchy
+                    # https://docs.python.org/3/reference/datamodel.html
+                    if obj_func.__func__ is func.__func__:
+                        return name
+    raise ValueError("Function %s is not a method of: %s" % (func, obj))
+
 
 def _get_method(obj, name):
     name = str(name)
diff --git a/scrapy/utils/request.py b/scrapy/utils/request.py
index 0e6e6675d..12c03d78e 100644
--- a/scrapy/utils/request.py
+++ b/scrapy/utils/request.py
@@ -3,20 +3,21 @@ This module provides some useful functions for working with
 scrapy.http.Request objects
 """
 
-from __future__ import print_function
 import hashlib
 import weakref
-from six.moves.urllib.parse import urlunparse
+from urllib.parse import urlunparse
 
-from twisted.internet.defer import Deferred
 from w3lib.http import basic_auth_header
+from w3lib.url import canonicalize_url
 
-from scrapy.utils.url import canonicalize_url
 from scrapy.utils.httpobj import urlparse_cached
+from scrapy.utils.python import to_bytes, to_unicode
 
 
 _fingerprint_cache = weakref.WeakKeyDictionary()
-def request_fingerprint(request, include_headers=None):
+
+
+def request_fingerprint(request, include_headers=None, keep_fragments=False):
     """
     Return the request fingerprint.
 
@@ -27,10 +28,10 @@ def request_fingerprint(request, include_headers=None):
     http://www.example.com/query?cat=222&id=111
 
     Even though those are two different URLs both point to the same resource
-    and are equivalent (ie. they should return the same response).
+    and are equivalent (i.e. they should return the same response).
 
     Another example are cookies used to store session ids. Suppose the
-    following page is only accesible to authenticated users:
+    following page is only accessible to authenticated users:
 
     http://www.example.com/members/offers.html
 
@@ -42,23 +43,30 @@ def request_fingerprint(request, include_headers=None):
     the fingeprint. If you want to include specific headers use the
     include_headers argument, which is a list of Request headers to include.
 
+    Also, servers usually ignore fragments in urls when handling requests,
+    so they are also ignored by default when calculating the fingerprint.
+    If you want to include them, set the keep_fragments argument to True
+    (for instance when handling requests with a headless browser).
+
     """
     if include_headers:
-        include_headers = tuple([h.lower() for h in sorted(include_headers)])
+        include_headers = tuple(to_bytes(h.lower()) for h in sorted(include_headers))
     cache = _fingerprint_cache.setdefault(request, {})
-    if include_headers not in cache:
+    cache_key = (include_headers, keep_fragments)
+    if cache_key not in cache:
         fp = hashlib.sha1()
-        fp.update(request.method)
-        fp.update(canonicalize_url(request.url))
-        fp.update(request.body or '')
+        fp.update(to_bytes(request.method))
+        fp.update(to_bytes(canonicalize_url(request.url, keep_fragments=keep_fragments)))
+        fp.update(request.body or b'')
         if include_headers:
             for hdr in include_headers:
                 if hdr in request.headers:
                     fp.update(hdr)
                     for v in request.headers.getlist(hdr):
                         fp.update(v)
-        cache[include_headers] = fp.hexdigest()
-    return cache[include_headers]
+        cache[cache_key] = fp.hexdigest()
+    return cache[cache_key]
+
 
 def request_authenticate(request, username, password):
     """Autenticate the given request (in place) using the HTTP basic access
@@ -66,19 +74,27 @@ def request_authenticate(request, username, password):
     """
     request.headers['Authorization'] = basic_auth_header(username, password)
 
+
 def request_httprepr(request):
-    """Return the raw HTTP representation (as string) of the given request.
+    """Return the raw HTTP representation (as bytes) of the given request.
     This is provided only for reference since it's not the actual stream of
     bytes that will be send when performing the request (that's controlled
     by Twisted).
     """
     parsed = urlparse_cached(request)
     path = urlunparse(('', '', parsed.path or '/', parsed.params, parsed.query, ''))
-    s  = "%s %s HTTP/1.1\r\n" % (request.method, path)
-    s += "Host: %s\r\n" % parsed.hostname
+    s = to_bytes(request.method) + b" " + to_bytes(path) + b" HTTP/1.1\r\n"
+    s += b"Host: " + to_bytes(parsed.hostname or b'') + b"\r\n"
     if request.headers:
-        s += request.headers.to_string() + "\r\n"
-    s += "\r\n"
+        s += request.headers.to_string() + b"\r\n"
+    s += b"\r\n"
     s += request.body
     return s
 
+
+def referer_str(request):
+    """ Return Referer HTTP header suitable for logging. """
+    referrer = request.headers.get('Referer')
+    if referrer is None:
+        return referrer
+    return to_unicode(referrer, errors='replace')
diff --git a/scrapy/utils/response.py b/scrapy/utils/response.py
index 61f43535f..c29b619ce 100644
--- a/scrapy/utils/response.py
+++ b/scrapy/utils/response.py
@@ -2,88 +2,81 @@
 This module provides some useful functions for working with
 scrapy.http.Response objects
 """
-
 import os
-import re
 import weakref
 import webbrowser
 import tempfile
 
 from twisted.web import http
-from twisted.web.http import RESPONSES
+from scrapy.utils.python import to_bytes, to_unicode
 from w3lib import html
 
-from scrapy.http import HtmlResponse, TextResponse
-from scrapy.utils.decorator import deprecated
-
-
-@deprecated
-def body_or_str(*a, **kw):
-    from scrapy.utils.iterators import _body_or_str
-    return _body_or_str(*a, **kw)
-
 
 _baseurl_cache = weakref.WeakKeyDictionary()
+
+
 def get_base_url(response):
     """Return the base url of the given response, joined with the response url"""
     if response not in _baseurl_cache:
-        text = response.body_as_unicode()[0:4096]
-        _baseurl_cache[response] = html.get_base_url(text, response.url, \
-            response.encoding)
+        text = response.text[0:4096]
+        _baseurl_cache[response] = html.get_base_url(text, response.url, response.encoding)
     return _baseurl_cache[response]
 
-_noscript_re = re.compile(u'<noscript>.*?</noscript>', re.IGNORECASE | re.DOTALL)
-_script_re = re.compile(u'<script.*?>.*?</script>', re.IGNORECASE | re.DOTALL)
+
 _metaref_cache = weakref.WeakKeyDictionary()
-def get_meta_refresh(response):
+
+
+def get_meta_refresh(response, ignore_tags=('script', 'noscript')):
     """Parse the http-equiv refrsh parameter from the given response"""
     if response not in _metaref_cache:
-        text = response.body_as_unicode()[0:4096]
-        text = _noscript_re.sub(u'', text)
-        text = _script_re.sub(u'', text)
-        _metaref_cache[response] = html.get_meta_refresh(text, response.url, \
-            response.encoding)
+        text = response.text[0:4096]
+        _metaref_cache[response] = html.get_meta_refresh(
+            text, response.url, response.encoding, ignore_tags=ignore_tags)
     return _metaref_cache[response]
 
+
 def response_status_message(status):
     """Return status code plus status text descriptive message
-
-    >>> response_status_message(200)
-    '200 OK'
-
-    >>> response_status_message(404)
-    '404 Not Found'
     """
-    return '%s %s' % (status, http.responses.get(int(status)))
+    message = http.RESPONSES.get(int(status), "Unknown Status")
+    return '%s %s' % (status, to_unicode(message))
+
 
 def response_httprepr(response):
-    """Return raw HTTP representation (as string) of the given response. This
+    """Return raw HTTP representation (as bytes) of the given response. This
     is provided only for reference, since it's not the exact stream of bytes
     that was received (that's not exposed by Twisted).
     """
-
-    s = "HTTP/1.1 %d %s\r\n" % (response.status, RESPONSES.get(response.status, ''))
+    values = [
+        b"HTTP/1.1 ",
+        to_bytes(str(response.status)),
+        b" ",
+        to_bytes(http.RESPONSES.get(response.status, b'')),
+        b"\r\n",
+    ]
     if response.headers:
-        s += response.headers.to_string() + "\r\n"
-    s += "\r\n"
-    s += response.body
-    return s
+        values.extend([response.headers.to_string(), b"\r\n"])
+    values.extend([b"\r\n", response.body])
+    return b"".join(values)
+
 
 def open_in_browser(response, _openfunc=webbrowser.open):
     """Open the given response in a local web browser, populating the <base>
     tag for external links to work
     """
+    from scrapy.http import HtmlResponse, TextResponse
     # XXX: this implementation is a bit dirty and could be improved
     body = response.body
     if isinstance(response, HtmlResponse):
-        if '<base' not in body:
-            body = body.replace('<head>', '<head><base href="%s">' % response.url)
+        if b'<base' not in body:
+            repl = '<head><base href="%s">' % response.url
+            body = body.replace(b'<head>', to_bytes(repl))
         ext = '.html'
     elif isinstance(response, TextResponse):
         ext = '.txt'
     else:
-        raise TypeError("Unsupported response type: %s" % \
-            response.__class__.__name__)
+        raise TypeError("Unsupported response type: %s" %
+                        response.__class__.__name__)
     fd, fname = tempfile.mkstemp(ext)
     os.write(fd, body)
     os.close(fd)
diff --git a/scrapy/utils/serialize.py b/scrapy/utils/serialize.py
index 8320be095..cc3263602 100644
--- a/scrapy/utils/serialize.py
+++ b/scrapy/utils/serialize.py
@@ -2,10 +2,10 @@ import json
 import datetime
 import decimal
 
+from itemadapter import is_item, ItemAdapter
 from twisted.internet import defer
 
 from scrapy.http import Request, Response
-from scrapy.item import BaseItem
 
 
 class ScrapyJSONEncoder(json.JSONEncoder):
@@ -14,7 +14,9 @@ class ScrapyJSONEncoder(json.JSONEncoder):
     TIME_FORMAT = "%H:%M:%S"
 
     def default(self, o):
-        if isinstance(o, datetime.datetime):
+        if isinstance(o, set):
+            return list(o)
+        elif isinstance(o, datetime.datetime):
             return o.strftime("%s %s" % (self.DATE_FORMAT, self.TIME_FORMAT))
         elif isinstance(o, datetime.date):
             return o.strftime(self.DATE_FORMAT)
@@ -24,14 +26,14 @@ class ScrapyJSONEncoder(json.JSONEncoder):
             return str(o)
         elif isinstance(o, defer.Deferred):
             return str(o)
-        elif isinstance(o, BaseItem):
-            return dict(o)
+        elif is_item(o):
+            return ItemAdapter(o).asdict()
         elif isinstance(o, Request):
             return "<%s %s %s>" % (type(o).__name__, o.method, o.url)
         elif isinstance(o, Response):
             return "<%s %s %s>" % (type(o).__name__, o.status, o.url)
         else:
-            return super(ScrapyJSONEncoder, self).default(o)
+            return super().default(o)
 
 
 class ScrapyJSONDecoder(json.JSONDecoder):
diff --git a/scrapy/utils/signal.py b/scrapy/utils/signal.py
index 724f3a892..115707182 100644
--- a/scrapy/utils/signal.py
+++ b/scrapy/utils/signal.py
@@ -1,39 +1,51 @@
 """Helper functions for working with signals"""
 
-from twisted.internet.defer import maybeDeferred, DeferredList, Deferred
+import logging
+
+from twisted.internet.defer import DeferredList, Deferred
 from twisted.python.failure import Failure
 
-from scrapy.xlib.pydispatch.dispatcher import Any, Anonymous, liveReceivers, \
-    getAllReceivers, disconnect
-from scrapy.xlib.pydispatch.robustapply import robustApply
+from pydispatch.dispatcher import Anonymous, Any, disconnect, getAllReceivers, liveReceivers
+from pydispatch.robustapply import robustApply
+
+from scrapy.exceptions import StopDownload
+from scrapy.utils.defer import maybeDeferred_coro
+from scrapy.utils.log import failure_to_exc_info
+
+
+logger = logging.getLogger(__name__)
+
+
+class _IgnoredException(Exception):
+    pass
 
-from scrapy import log
 
 def send_catch_log(signal=Any, sender=Anonymous, *arguments, **named):
     """Like pydispatcher.robust.sendRobust but it also logs errors and returns
     Failures instead of exceptions.
     """
-    dont_log = named.pop('dont_log', None)
+    dont_log = (named.pop('dont_log', _IgnoredException), StopDownload)
     spider = named.get('spider', None)
     responses = []
     for receiver in liveReceivers(getAllReceivers(sender, signal)):
         try:
-            response = robustApply(receiver, signal=signal, sender=sender,
-                *arguments, **named)
+            response = robustApply(receiver, signal=signal, sender=sender, *arguments, **named)
             if isinstance(response, Deferred):
-                log.msg(format="Cannot return deferreds from signal handler: %(receiver)s",
-                        level=log.ERROR, spider=spider, receiver=receiver)
+                logger.error("Cannot return deferreds from signal handler: %(receiver)s",
+                             {'receiver': receiver}, extra={'spider': spider})
         except dont_log:
             result = Failure()
         except Exception:
             result = Failure()
-            log.err(result, "Error caught on signal handler: %s" % receiver, \
-                spider=spider)
+            logger.error("Error caught on signal handler: %(receiver)s",
+                         {'receiver': receiver},
+                         exc_info=True, extra={'spider': spider})
         else:
             result = response
         responses.append((receiver, result))
     return responses
 
+
 def send_catch_log_deferred(signal=Any, sender=Anonymous, *arguments, **named):
     """Like send_catch_log but supports returning deferreds on signal handlers.
     Returns a deferred that gets fired once all signal handlers deferreds were
@@ -41,16 +53,17 @@ def send_catch_log_deferred(signal=Any, sender=Anonymous, *arguments, **named):
     """
     def logerror(failure, recv):
         if dont_log is None or not isinstance(failure.value, dont_log):
-            log.err(failure, "Error caught on signal handler: %s" % recv, \
-                spider=spider)
+            logger.error("Error caught on signal handler: %(receiver)s",
+                         {'receiver': recv},
+                         exc_info=failure_to_exc_info(failure),
+                         extra={'spider': spider})
         return failure
 
     dont_log = named.pop('dont_log', None)
     spider = named.get('spider', None)
     dfds = []
     for receiver in liveReceivers(getAllReceivers(sender, signal)):
-        d = maybeDeferred(robustApply, receiver, signal=signal, sender=sender,
-                *arguments, **named)
+        d = maybeDeferred_coro(robustApply, receiver, signal=signal, sender=sender, *arguments, **named)
         d.addErrback(logerror, receiver)
         d.addBoth(lambda result: (receiver, result))
         dfds.append(d)
@@ -58,6 +71,7 @@ def send_catch_log_deferred(signal=Any, sender=Anonymous, *arguments, **named):
     d.addCallback(lambda out: [x[1] for x in out])
     return d
 
+
 def disconnect_all(signal=Any, sender=Any):
     """Disconnect all signal handlers. Useful for cleaning up after running
     tests
diff --git a/scrapy/utils/sitemap.py b/scrapy/utils/sitemap.py
index bbf37bc28..a57a0c291 100644
--- a/scrapy/utils/sitemap.py
+++ b/scrapy/utils/sitemap.py
@@ -4,10 +4,13 @@ Module for processing Sitemaps.
 Note: The main purpose of this module is to provide support for the
 SitemapSpider, its API is subject to change without notice.
 """
+
+from urllib.parse import urljoin
+
 import lxml.etree
 
 
-class Sitemap(object):
+class Sitemap:
     """Class to parse Sitemap (type=urlset) and Sitemap Index
     (type=sitemapindex) files"""
 
@@ -34,10 +37,11 @@ class Sitemap(object):
                 yield d
 
 
-def sitemap_urls_from_robots(robots_text):
+def sitemap_urls_from_robots(robots_text, base_url=None):
     """Return an iterator over all sitemap urls contained in the given
     robots.txt file
     """
     for line in robots_text.splitlines():
-        if line.lstrip().startswith('Sitemap:'):
-            yield line.split(':', 1)[1].strip()
+        if line.lstrip().lower().startswith('sitemap:'):
+            url = line.split(':', 1)[1].strip()
+            yield urljoin(base_url, url)
diff --git a/scrapy/utils/spider.py b/scrapy/utils/spider.py
index 4e43bc13f..f3a9a67a3 100644
--- a/scrapy/utils/spider.py
+++ b/scrapy/utils/spider.py
@@ -1,53 +1,74 @@
 import inspect
+import logging
 
-import six
+from scrapy.spiders import Spider
+from scrapy.utils.defer import deferred_from_coro
+from scrapy.utils.misc import arg_to_iter
+try:
+    from scrapy.utils.py36 import collect_asyncgen
+except SyntaxError:
+    collect_asyncgen = None
 
-from scrapy import log
-from scrapy.item import BaseItem
-from scrapy.utils.misc import  arg_to_iter
+
+logger = logging.getLogger(__name__)
 
 
 def iterate_spider_output(result):
-    return [result] if isinstance(result, BaseItem) else arg_to_iter(result)
+    if collect_asyncgen and hasattr(inspect, 'isasyncgen') and inspect.isasyncgen(result):
+        d = deferred_from_coro(collect_asyncgen(result))
+        d.addCallback(iterate_spider_output)
+        return d
+    elif inspect.iscoroutine(result):
+        d = deferred_from_coro(result)
+        d.addCallback(iterate_spider_output)
+        return d
+    return arg_to_iter(result)
+
 
 def iter_spider_classes(module):
     """Return an iterator over all spider classes defined in the given module
-    that can be instantiated (ie. which have name)
+    that can be instantiated (i.e. which have name)
     """
     # this needs to be imported here until get rid of the spider manager
     # singleton in scrapy.spider.spiders
-    from scrapy.spider import Spider
+    from scrapy.spiders import Spider
 
-    for obj in six.itervalues(vars(module)):
-        if inspect.isclass(obj) and \
-           issubclass(obj, Spider) and \
-           obj.__module__ == module.__name__ and \
-           getattr(obj, 'name', None):
+    for obj in vars(module).values():
+        if (
+            inspect.isclass(obj)
+            and issubclass(obj, Spider)
+            and obj.__module__ == module.__name__
+            and getattr(obj, 'name', None)
+        ):
             yield obj
 
-def create_spider_for_request(spidermanager, request, default_spider=None, \
-        log_none=False, log_multiple=False, **spider_kwargs):
-    """Create a spider to handle the given Request.
+
+def spidercls_for_request(spider_loader, request, default_spidercls=None,
+                          log_none=False, log_multiple=False):
+    """Return a spider class that handles the given Request.
 
     This will look for the spiders that can handle the given request (using
-    the spider manager) and return a (new) Spider if (and only if) there is
+    the spider loader) and return a Spider class if (and only if) there is
     only one Spider able to handle the Request.
 
     If multiple spiders (or no spider) are found, it will return the
-    default_spider passed. It can optionally log if multiple or no spiders
+    default_spidercls passed. It can optionally log if multiple or no spiders
     are found.
     """
-    snames = spidermanager.find_by_request(request)
+    snames = spider_loader.find_by_request(request)
     if len(snames) == 1:
-        return spidermanager.create(snames[0], **spider_kwargs)
+        return spider_loader.load(snames[0])
 
     if len(snames) > 1 and log_multiple:
-        log.msg(format='More than one spider can handle: %(request)s - %(snames)s',
-                level=log.ERROR, request=request, snames=', '.join(snames))
+        logger.error('More than one spider can handle: %(request)s - %(snames)s',
+                     {'request': request, 'snames': ', '.join(snames)})
 
     if len(snames) == 0 and log_none:
-        log.msg(format='Unable to find spider that handles: %(request)s',
-                level=log.ERROR, request=request)
+        logger.error('Unable to find spider that handles: %(request)s',
+                     {'request': request})
 
-    return default_spider
+    return default_spidercls
 
+
+class DefaultSpider(Spider):
+    name = 'default'
diff --git a/scrapy/utils/ssl.py b/scrapy/utils/ssl.py
new file mode 100644
index 000000000..c3c5e329b
--- /dev/null
+++ b/scrapy/utils/ssl.py
@@ -0,0 +1,61 @@
+import OpenSSL
+import OpenSSL._util as pyOpenSSLutil
+
+from scrapy.utils.python import to_unicode
+
+
+# The OpenSSL symbol is present since 1.1.1 but it's not currently supported in any version of pyOpenSSL.
+# Using the binding directly, as this code does, requires cryptography 2.4.
+SSL_OP_NO_TLSv1_3 = getattr(pyOpenSSLutil.lib, 'SSL_OP_NO_TLSv1_3', 0)
+
+
+def ffi_buf_to_string(buf):
+    return to_unicode(pyOpenSSLutil.ffi.string(buf))
+
+
+def x509name_to_string(x509name):
+    # from OpenSSL.crypto.X509Name.__repr__
+    result_buffer = pyOpenSSLutil.ffi.new("char[]", 512)
+    pyOpenSSLutil.lib.X509_NAME_oneline(x509name._name, result_buffer, len(result_buffer))
+
+    return ffi_buf_to_string(result_buffer)
+
+
+def get_temp_key_info(ssl_object):
+    if not hasattr(pyOpenSSLutil.lib, 'SSL_get_server_tmp_key'):  # requires OpenSSL 1.0.2
+        return None
+
+    # adapted from OpenSSL apps/s_cb.c::ssl_print_tmp_key()
+    temp_key_p = pyOpenSSLutil.ffi.new("EVP_PKEY **")
+    if not pyOpenSSLutil.lib.SSL_get_server_tmp_key(ssl_object, temp_key_p):
+        return None
+    temp_key = temp_key_p[0]
+    if temp_key == pyOpenSSLutil.ffi.NULL:
+        return None
+    temp_key = pyOpenSSLutil.ffi.gc(temp_key, pyOpenSSLutil.lib.EVP_PKEY_free)
+    key_info = []
+    key_type = pyOpenSSLutil.lib.EVP_PKEY_id(temp_key)
+    if key_type == pyOpenSSLutil.lib.EVP_PKEY_RSA:
+        key_info.append('RSA')
+    elif key_type == pyOpenSSLutil.lib.EVP_PKEY_DH:
+        key_info.append('DH')
+    elif key_type == pyOpenSSLutil.lib.EVP_PKEY_EC:
+        key_info.append('ECDH')
+        ec_key = pyOpenSSLutil.lib.EVP_PKEY_get1_EC_KEY(temp_key)
+        ec_key = pyOpenSSLutil.ffi.gc(ec_key, pyOpenSSLutil.lib.EC_KEY_free)
+        nid = pyOpenSSLutil.lib.EC_GROUP_get_curve_name(pyOpenSSLutil.lib.EC_KEY_get0_group(ec_key))
+        cname = pyOpenSSLutil.lib.EC_curve_nid2nist(nid)
+        if cname == pyOpenSSLutil.ffi.NULL:
+            cname = pyOpenSSLutil.lib.OBJ_nid2sn(nid)
+        key_info.append(ffi_buf_to_string(cname))
+    else:
+        key_info.append(ffi_buf_to_string(pyOpenSSLutil.lib.OBJ_nid2sn(key_type)))
+    key_info.append('%s bits' % pyOpenSSLutil.lib.EVP_PKEY_bits(temp_key))
+    return ', '.join(key_info)
+
+
+def get_openssl_version():
+    system_openssl = OpenSSL.SSL.SSLeay_version(
+        OpenSSL.SSL.SSLEAY_VERSION
+    ).decode('ascii', errors='replace')
+    return '{} ({})'.format(OpenSSL.version.__version__, system_openssl)
diff --git a/scrapy/utils/template.py b/scrapy/utils/template.py
index 1cfcd82ee..96ff4b09b 100644
--- a/scrapy/utils/template.py
+++ b/scrapy/utils/template.py
@@ -4,18 +4,23 @@ import os
 import re
 import string
 
+
 def render_templatefile(path, **kwargs):
-    with open(path, 'rb') as file:
-        raw = file.read()
+    with open(path, 'rb') as fp:
+        raw = fp.read().decode('utf8')
 
     content = string.Template(raw).substitute(**kwargs)
 
-    with open(path.rstrip('.tmpl'), 'wb') as file:
-        file.write(content)
+    render_path = path[:-len('.tmpl')] if path.endswith('.tmpl') else path
+    with open(render_path, 'wb') as fp:
+        fp.write(content.encode('utf8'))
     if path.endswith('.tmpl'):
         os.remove(path)
 
-CAMELCASE_INVALID_CHARS = re.compile('[^a-zA-Z\d]')
+
+CAMELCASE_INVALID_CHARS = re.compile(r'[^a-zA-Z\d]')
+
+
 def string_camelcase(string):
     """ Convert a word  to its CamelCase version and remove invalid chars
 
diff --git a/scrapy/utils/test.py b/scrapy/utils/test.py
index e6376d519..7442a2f33 100644
--- a/scrapy/utils/test.py
+++ b/scrapy/utils/test.py
@@ -2,33 +2,101 @@
 This module contains some assorted functions used in tests
 """
 
+import asyncio
 import os
+from posixpath import split
+from unittest import mock
 
 from importlib import import_module
 from twisted.trial.unittest import SkipTest
 
+from scrapy.exceptions import NotConfigured
+from scrapy.utils.boto import is_botocore
+
 
 def assert_aws_environ():
     """Asserts the current environment is suitable for running AWS testsi.
     Raises SkipTest with the reason if it's not.
     """
-    try:
-        import boto
-    except ImportError as e:
-        raise SkipTest(str(e))
-
+    skip_if_no_boto()
     if 'AWS_ACCESS_KEY_ID' not in os.environ:
         raise SkipTest("AWS keys not found")
 
-def get_crawler(settings_dict=None):
+
+def assert_gcs_environ():
+    if 'GCS_PROJECT_ID' not in os.environ:
+        raise SkipTest("GCS_PROJECT_ID not found")
+
+
+def skip_if_no_boto():
+    try:
+        is_botocore()
+    except NotConfigured as e:
+        raise SkipTest(e)
+
+
+def get_s3_content_and_delete(bucket, path, with_key=False):
+    """ Get content from s3 key, and delete key afterwards.
+    """
+    if is_botocore():
+        import botocore.session
+        session = botocore.session.get_session()
+        client = session.create_client('s3')
+        key = client.get_object(Bucket=bucket, Key=path)
+        content = key['Body'].read()
+        client.delete_object(Bucket=bucket, Key=path)
+    else:
+        import boto
+        # assuming boto=2.2.2
+        bucket = boto.connect_s3().get_bucket(bucket, validate=False)
+        key = bucket.get_key(path)
+        content = key.get_contents_as_string()
+        bucket.delete_key(path)
+    return (content, key) if with_key else content
+
+
+def get_gcs_content_and_delete(bucket, path):
+    from google.cloud import storage
+    client = storage.Client(project=os.environ.get('GCS_PROJECT_ID'))
+    bucket = client.get_bucket(bucket)
+    blob = bucket.get_blob(path)
+    content = blob.download_as_string()
+    acl = list(blob.acl)  # loads acl before it will be deleted
+    bucket.delete_blob(path)
+    return content, acl, blob
+
+
+def get_ftp_content_and_delete(
+        path, host, port, username,
+        password, use_active_mode=False):
+    from ftplib import FTP
+    ftp = FTP()
+    ftp.connect(host, port)
+    ftp.login(username, password)
+    if use_active_mode:
+        ftp.set_pasv(False)
+    ftp_data = []
+
+    def buffer_data(data):
+        ftp_data.append(data)
+    ftp.retrbinary('RETR %s' % path, buffer_data)
+    dirname, filename = split(path)
+    ftp.cwd(dirname)
+    ftp.delete(filename)
+    return "".join(ftp_data)
+
+
+def get_crawler(spidercls=None, settings_dict=None):
     """Return an unconfigured Crawler object. If settings_dict is given, it
     will be used to populate the crawler settings with a project level
     priority.
     """
-    from scrapy.crawler import Crawler
-    from scrapy.settings import Settings
+    from scrapy.crawler import CrawlerRunner
+    from scrapy.spiders import Spider
+
+    runner = CrawlerRunner(settings_dict)
+    return runner.create_crawler(spidercls or Spider)
 
-    return Crawler(Settings(settings_dict))
 
 def get_pythonpath():
     """Return a PYTHONPATH suitable to use in processes so that they find this
@@ -36,6 +104,7 @@ def get_pythonpath():
     scrapy_path = import_module('scrapy').__path__[0]
     return os.path.dirname(scrapy_path) + os.pathsep + os.environ.get('PYTHONPATH', '')
 
+
 def get_testenv():
     """Return a OS environment dict suitable to fork processes that need to import
     this installation of Scrapy, instead of a system installed one.
@@ -44,18 +113,6 @@ def get_testenv():
     env['PYTHONPATH'] = get_pythonpath()
     return env
 
-def get_testlog():
-    """Get Scrapy log of current test, ignoring the rest"""
-    with open("test.log", "rb") as fp:
-        loglines = fp.readlines()
-
-    thistest = []
-    for line in loglines[::-1]:
-        thistest.append(line)
-        if "[-] -->" in line:
-            break
-    return "".join(thistest[::-1])
-
 
 def assert_samelines(testcase, text1, text2, msg=None):
     """Asserts text1 and text2 have the same lines, ignoring differences in
@@ -63,9 +120,25 @@ def assert_samelines(testcase, text1, text2, msg=None):
     """
     testcase.assertEqual(text1.splitlines(), text2.splitlines(), msg)
 
-def docrawl(spider, settings=None):
-    """Configure and start Crawler; return the result of crawler.start()"""
-    crawler = get_crawler(settings)
-    crawler.configure()
-    crawler.crawl(spider)
-    return crawler.start()
+
+def get_from_asyncio_queue(value):
+    q = asyncio.Queue()
+    getter = q.get()
+    q.put_nowait(value)
+    return getter
+
+
+def mock_google_cloud_storage():
+    """Creates autospec mocks for google-cloud-storage Client, Bucket and Blob
+    classes and set their proper return values.
+    """
+    from google.cloud.storage import Client, Bucket, Blob
+    client_mock = mock.create_autospec(Client)
+
+    bucket_mock = mock.create_autospec(Bucket)
+    client_mock.get_bucket.return_value = bucket_mock
+
+    blob_mock = mock.create_autospec(Blob)
+    bucket_mock.blob.return_value = blob_mock
+
+    return (client_mock, bucket_mock, blob_mock)
diff --git a/scrapy/utils/testproc.py b/scrapy/utils/testproc.py
index cba3b4346..a63c9a942 100644
--- a/scrapy/utils/testproc.py
+++ b/scrapy/utils/testproc.py
@@ -1,15 +1,17 @@
 import sys
 import os
 
-from twisted.internet import reactor, defer, protocol
+from twisted.internet import defer, protocol
 
-class ProcessTest(object):
+
+class ProcessTest:
 
     command = None
     prefix = [sys.executable, '-m', 'scrapy.cmdline']
-    cwd = os.getcwd() # trial chdirs to temp dir
+    cwd = os.getcwd()  # trial chdirs to temp dir
 
     def execute(self, args, check_code=True, settings=None):
+        from twisted.internet import reactor
         env = os.environ.copy()
         if settings is not None:
             env['SCRAPY_SETTINGS_MODULE'] = settings
@@ -33,8 +35,8 @@ class TestProcessProtocol(protocol.ProcessProtocol):
 
     def __init__(self):
         self.deferred = defer.Deferred()
-        self.out = ''
-        self.err = ''
+        self.out = b''
+        self.err = b''
         self.exitcode = None
 
     def outReceived(self, data):
diff --git a/scrapy/utils/testsite.py b/scrapy/utils/testsite.py
index 9f8419814..397e54703 100644
--- a/scrapy/utils/testsite.py
+++ b/scrapy/utils/testsite.py
@@ -1,32 +1,44 @@
-from __future__ import print_function
-from six.moves.urllib.parse import urljoin
+from urllib.parse import urljoin
 
-from twisted.internet import reactor
 from twisted.web import server, resource, static, util
 
-class SiteTest(object):
+
+class SiteTest:
 
     def setUp(self):
+        from twisted.internet import reactor
+        super().setUp()
         self.site = reactor.listenTCP(0, test_site(), interface="127.0.0.1")
         self.baseurl = "http://localhost:%d/" % self.site.getHost().port
 
     def tearDown(self):
+        super().tearDown()
         self.site.stopListening()
 
     def url(self, path):
         return urljoin(self.baseurl, path)
 
+
+class NoMetaRefreshRedirect(util.Redirect):
+    def render(self, request):
+        content = util.Redirect.render(self, request)
+        return content.replace(b'http-equiv=\"refresh\"',
+            b'http-no-equiv=\"do-not-refresh-me\"')
+
+
 def test_site():
     r = resource.Resource()
-    r.putChild("text", static.Data("Works", "text/plain"))
-    r.putChild("html", static.Data("<body><p class='one'>Works</p><p class='two'>World</p></body>", "text/html"))
-    r.putChild("enc-gb18030", static.Data("<p>gb18030 encoding</p>", "text/html; charset=gb18030"))
-    r.putChild("redirect", util.Redirect("/redirected"))
-    r.putChild("redirected", static.Data("Redirected here", "text/plain"))
+    r.putChild(b"text", static.Data(b"Works", "text/plain"))
+    r.putChild(b"html", static.Data(b"<body><p class='one'>Works</p><p class='two'>World</p></body>", "text/html"))
+    r.putChild(b"enc-gb18030", static.Data(b"<p>gb18030 encoding</p>", "text/html; charset=gb18030"))
+    r.putChild(b"redirect", util.Redirect(b"/redirected"))
+    r.putChild(b"redirect-no-meta-refresh", NoMetaRefreshRedirect(b"/redirected"))
+    r.putChild(b"redirected", static.Data(b"Redirected here", "text/plain"))
     return server.Site(r)
-    
+
 
 if __name__ == '__main__':
+    from twisted.internet import reactor
     port = reactor.listenTCP(0, test_site(), interface="127.0.0.1")
     print("http://localhost:%d/" % port.getHost().port)
     reactor.run()
diff --git a/scrapy/utils/trackref.py b/scrapy/utils/trackref.py
index 252c40632..baed5c536 100644
--- a/scrapy/utils/trackref.py
+++ b/scrapy/utils/trackref.py
@@ -2,26 +2,25 @@
 references to live object instances.
 
 If you want live objects for a particular class to be tracked, you only have to
-subclass form object_ref (instead of object).
+subclass from object_ref (instead of object).
 
 About performance: This library has a minimal performance impact when enabled,
 and no performance penalty at all when disabled (as object_ref becomes just an
 alias to object in that case).
 """
 
-from __future__ import print_function
-import weakref, os, six
-from collections import defaultdict
+import weakref
 from time import time
 from operator import itemgetter
+from collections import defaultdict
+
 
 NoneType = type(None)
-
 live_refs = defaultdict(weakref.WeakKeyDictionary)
 
-class object_ref(object):
-    """Inherit from this class (instead of object) to a keep a record of live
-    instances"""
+
+class object_ref:
+    """Inherit from this class to a keep a record of live instances"""
 
     __slots__ = ()
 
@@ -30,29 +29,40 @@ class object_ref(object):
         live_refs[cls][obj] = time()
         return obj
 
+
 def format_live_refs(ignore=NoneType):
-    s = "Live References" + os.linesep + os.linesep
+    """Return a tabular representation of tracked objects"""
+    s = "Live References\n\n"
     now = time()
-    for cls, wdict in six.iteritems(live_refs):
+    for cls, wdict in sorted(live_refs.items(),
+                             key=lambda x: x[0].__name__):
         if not wdict:
             continue
         if issubclass(cls, ignore):
             continue
-        oldest = min(wdict.itervalues())
-        s += "%-30s %6d   oldest: %ds ago" % (cls.__name__, len(wdict), \
-            now-oldest) + os.linesep
+        oldest = min(wdict.values())
+        s += "%-30s %6d   oldest: %ds ago\n" % (
+            cls.__name__, len(wdict), now - oldest
+        )
     return s
 
+
 def print_live_refs(*a, **kw):
+    """Print tracked objects"""
     print(format_live_refs(*a, **kw))
 
+
 def get_oldest(class_name):
-    for cls, wdict in six.iteritems(live_refs):
+    """Get the oldest object for a specific class name"""
+    for cls, wdict in live_refs.items():
         if cls.__name__ == class_name:
-            if wdict:
-                return min(six.iteritems(wdict), key=itemgetter(1))[0]
+            if not wdict:
+                break
+            return min(wdict.items(), key=itemgetter(1))[0]
+
 
 def iter_all(class_name):
-    for cls, wdict in six.iteritems(live_refs):
+    """Iterate over all objects of the same class by its class name"""
+    for cls, wdict in live_refs.items():
         if cls.__name__ == class_name:
-            return six.iterkeys(wdict)
+            return wdict.keys()
diff --git a/scrapy/utils/url.py b/scrapy/utils/url.py
index ab4d75f87..b23ddb459 100644
--- a/scrapy/utils/url.py
+++ b/scrapy/utils/url.py
@@ -6,80 +6,47 @@ Some of the functions that used to be imported from this module have been moved
 to the w3lib.url module. Always import those from there instead.
 """
 import posixpath
-from six.moves.urllib.parse import ParseResult, urlunparse, urldefrag, urlparse
-import urllib
-import cgi
+import re
+from urllib.parse import ParseResult, urldefrag, urlparse, urlunparse
 
-# scrapy.utils.url was moved to w3lib.url and import * ensures this move doesn't break old code
+# scrapy.utils.url was moved to w3lib.url and import * ensures this
+# move doesn't break old code
 from w3lib.url import *
-from scrapy.utils.python import unicode_to_str
+from w3lib.url import _safe_chars, _unquotepath  # noqa: F401
+from scrapy.utils.python import to_unicode
 
 
 def url_is_from_any_domain(url, domains):
     """Return True if the url belongs to any of the given domains"""
     host = parse_url(url).netloc.lower()
-
-    if host:
-        return any(((host == d.lower()) or (host.endswith('.%s' % d.lower())) for d in domains))
-    else:
+    if not host:
         return False
+    domains = [d.lower() for d in domains]
+    return any((host == d) or (host.endswith('.%s' % d)) for d in domains)
 
 
 def url_is_from_spider(url, spider):
     """Return True if the url belongs to the given spider"""
-    return url_is_from_any_domain(url,
-        [spider.name] + list(getattr(spider, 'allowed_domains', [])))
+    return url_is_from_any_domain(url, [spider.name] + list(getattr(spider, 'allowed_domains', [])))
 
 
 def url_has_any_extension(url, extensions):
     return posixpath.splitext(parse_url(url).path)[1].lower() in extensions
 
 
-def canonicalize_url(url, keep_blank_values=True, keep_fragments=False,
-        encoding=None):
-    """Canonicalize the given url by applying the following procedures:
-
-    - sort query arguments, first by key, then by value
-    - percent encode paths and query arguments. non-ASCII characters are
-      percent-encoded using UTF-8 (RFC-3986)
-    - normalize all spaces (in query arguments) '+' (plus symbol)
-    - normalize percent encodings case (%2f -> %2F)
-    - remove query arguments with blank values (unless keep_blank_values is True)
-    - remove fragments (unless keep_fragments is True)
-
-    The url passed can be a str or unicode, while the url returned is always a
-    str.
-
-    For examples see the tests in tests/test_utils_url.py
-    """
-
-    scheme, netloc, path, params, query, fragment = parse_url(url)
-    keyvals = cgi.parse_qsl(query, keep_blank_values)
-    keyvals.sort()
-    query = urllib.urlencode(keyvals)
-    path = safe_url_string(_unquotepath(path)) or '/'
-    fragment = '' if not keep_fragments else fragment
-    return urlunparse((scheme, netloc.lower(), path, params, query, fragment))
-
-
-def _unquotepath(path):
-    for reserved in ('2f', '2F', '3f', '3F'):
-        path = path.replace('%' + reserved, '%25' + reserved.upper())
-    return urllib.unquote(path)
-
-
 def parse_url(url, encoding=None):
     """Return urlparsed url from the given argument (which could be an already
     parsed url)
     """
-    return url if isinstance(url, ParseResult) else \
-        urlparse(unicode_to_str(url, encoding))
+    if isinstance(url, ParseResult):
+        return url
+    return urlparse(to_unicode(url, encoding))
 
 
 def escape_ajax(url):
     """
     Return the crawleable url according to:
-    http://code.google.com/web/ajaxcrawling/docs/getting-started.html
+    https://developers.google.com/webmasters/ajax-crawling/docs/getting-started
 
     >>> escape_ajax("www.example.com/ajax.html#!key=value")
     'www.example.com/ajax.html?_escaped_fragment_=key%3Dvalue'
@@ -103,3 +70,95 @@ def escape_ajax(url):
     if not frag.startswith('!'):
         return url
     return add_or_replace_parameter(defrag, '_escaped_fragment_', frag[1:])
+
+
+def add_http_if_no_scheme(url):
+    """Add http as the default scheme if it is missing from the url."""
+    match = re.match(r"^\w+://", url, flags=re.I)
+    if not match:
+        parts = urlparse(url)
+        scheme = "http:" if parts.netloc else "http://"
+        url = scheme + url
+
+    return url
+
+
+def _is_posix_path(string):
+    return bool(
+        re.match(
+            r'''
+            ^                   # start with...
+            (
+                \.              # ...a single dot,
+                (
+                    \. | [^/\.]+  # optionally followed by
+                )?                # either a second dot or some characters
+                |
+                ~   # $HOME
+            )?      # optional match of ".", ".." or ".blabla"
+            /       # at least one "/" for a file path,
+            .       # and something after the "/"
+            ''',
+            string,
+            flags=re.VERBOSE,
+        )
+    )
+
+
+def _is_windows_path(string):
+    return bool(
+        re.match(
+            r'''
+            ^
+            (
+                [a-z]:\\
+                | \\\\
+            )
+            ''',
+            string,
+            flags=re.IGNORECASE | re.VERBOSE,
+        )
+    )
+
+
+def _is_filesystem_path(string):
+    return _is_posix_path(string) or _is_windows_path(string)
+
+
+def guess_scheme(url):
+    """Add an URL scheme if missing: file:// for filepath-like input or
+    http:// otherwise."""
+    if _is_filesystem_path(url):
+        return any_to_uri(url)
+    return add_http_if_no_scheme(url)
+
+
+def strip_url(url, strip_credentials=True, strip_default_port=True, origin_only=False, strip_fragment=True):
+
+    """Strip URL string from some of its components:
+
+    - ``strip_credentials`` removes "user:password@"
+    - ``strip_default_port`` removes ":80" (resp. ":443", ":21")
+      from http:// (resp. https://, ftp://) URLs
+    - ``origin_only`` replaces path component with "/", also dropping
+      query and fragment components ; it also strips credentials
+    - ``strip_fragment`` drops any #fragment component
+    """
+
+    parsed_url = urlparse(url)
+    netloc = parsed_url.netloc
+    if (strip_credentials or origin_only) and (parsed_url.username or parsed_url.password):
+        netloc = netloc.split('@')[-1]
+    if strip_default_port and parsed_url.port:
+        if (parsed_url.scheme, parsed_url.port) in (('http', 80),
+                                                    ('https', 443),
+                                                    ('ftp', 21)):
+            netloc = netloc.replace(':{p.port}'.format(p=parsed_url), '')
+    return urlunparse((
+        parsed_url.scheme,
+        netloc,
+        '/' if origin_only else parsed_url.path,
+        '' if origin_only else parsed_url.params,
+        '' if origin_only else parsed_url.query,
+        '' if strip_fragment else parsed_url.fragment
+    ))
diff --git a/scrapy/utils/versions.py b/scrapy/utils/versions.py
new file mode 100644
index 000000000..b0737d3d5
--- /dev/null
+++ b/scrapy/utils/versions.py
@@ -0,0 +1,31 @@
+import platform
+import sys
+
+import cryptography
+import cssselect
+import lxml.etree
+import parsel
+import twisted
+import w3lib
+
+import scrapy
+from scrapy.utils.ssl import get_openssl_version
+
+
+def scrapy_components_versions():
+    lxml_version = ".".join(map(str, lxml.etree.LXML_VERSION))
+    libxml2_version = ".".join(map(str, lxml.etree.LIBXML_VERSION))
+
+    return [
+        ("Scrapy", scrapy.__version__),
+        ("lxml", lxml_version),
+        ("libxml2", libxml2_version),
+        ("cssselect", cssselect.__version__),
+        ("parsel", parsel.__version__),
+        ("w3lib", w3lib.__version__),
+        ("Twisted", twisted.version.short()),
+        ("Python", sys.version.replace("\n", "- ")),
+        ("pyOpenSSL", get_openssl_version()),
+        ("cryptography", cryptography.__version__),
+        ("Platform", platform.platform()),
+    ]
diff --git a/scrapy/xlib/__init__.py b/scrapy/xlib/__init__.py
deleted file mode 100644
index 11f022087..000000000
--- a/scrapy/xlib/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-"""This package contains some third party modules that are distributed along
-with Scrapy"""
diff --git a/scrapy/xlib/lsprofcalltree.py b/scrapy/xlib/lsprofcalltree.py
deleted file mode 100644
index a604016cc..000000000
--- a/scrapy/xlib/lsprofcalltree.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# lsprofcalltree.py: lsprof output which is readable by kcachegrind
-# David Allouche
-# Jp Calderone & Itamar Shtull-Trauring
-# Johan Dahlin
-
-from __future__ import print_function
-import optparse
-import os
-import sys
-
-try:
-    import cProfile
-except ImportError:
-    raise SystemExit("This script requires cProfile from Python 2.5")
-
-def label(code):
-    if isinstance(code, str):
-        return ('~', 0, code)    # built-in functions ('~' sorts at the end)
-    else:
-        return '%s %s:%d' % (code.co_name,
-                             code.co_filename,
-                             code.co_firstlineno)
-
-class KCacheGrind(object):
-    def __init__(self, profiler):
-        self.data = profiler.getstats()
-        self.out_file = None
-
-    def output(self, out_file):
-        self.out_file = out_file
-        print('events: Ticks', file=out_file)
-        self._print_summary()
-        for entry in self.data:
-            self._entry(entry)
-
-    def _print_summary(self):
-        max_cost = 0
-        for entry in self.data:
-            totaltime = int(entry.totaltime * 1000)
-            max_cost = max(max_cost, totaltime)
-        print('summary: %d' % (max_cost,), file=self.out_file)
-
-    def _entry(self, entry):
-        out_file = self.out_file
-
-        code = entry.code
-        #print >> out_file, 'ob=%s' % (code.co_filename,)
-        if isinstance(code, str):
-            print('fi=~', file=out_file)
-        else:
-            print('fi=%s' % (code.co_filename,), file=out_file)
-        print('fn=%s' % (label(code),), file=out_file)
-
-        inlinetime = int(entry.inlinetime * 1000)
-        if isinstance(code, str):
-            print('0 ', inlinetime, file=out_file)
-        else:
-            print('%d %d' % (code.co_firstlineno, inlinetime), file=out_file)
-
-        # recursive calls are counted in entry.calls
-        if entry.calls:
-            calls = entry.calls
-        else:
-            calls = []
-
-        if isinstance(code, str):
-            lineno = 0
-        else:
-            lineno = code.co_firstlineno
-
-        for subentry in calls:
-            self._subentry(lineno, subentry)
-        print(file=out_file)
-
-    def _subentry(self, lineno, subentry):
-        out_file = self.out_file
-        code = subentry.code
-        #print >> out_file, 'cob=%s' % (code.co_filename,)
-        print('cfn=%s' % (label(code),), file=out_file)
-        if isinstance(code, str):
-            print('cfi=~', file=out_file)
-            print('calls=%d 0' % (subentry.callcount,), file=out_file)
-        else:
-            print('cfi=%s' % (code.co_filename,), file=out_file)
-            print('calls=%d %d' % (
-                subentry.callcount, code.co_firstlineno), file=out_file)
-
-        totaltime = int(subentry.totaltime * 1000)
-        print('%d %d' % (lineno, totaltime), file=out_file)
-
-def main(args):
-    usage = "%s [-o output_file_path] scriptfile [arg] ..."
-    parser = optparse.OptionParser(usage=usage % sys.argv[0])
-    parser.allow_interspersed_args = False
-    parser.add_option('-o', '--outfile', dest="outfile",
-                      help="Save stats to <outfile>", default=None)
-
-    if not sys.argv[1:]:
-        parser.print_usage()
-        sys.exit(2)
-
-    options, args = parser.parse_args()
-
-    if not options.outfile:
-        options.outfile = '%s.log' % os.path.basename(args[0])
-
-    sys.argv[:] = args
-
-    prof = cProfile.Profile()
-    try:
-        try:
-            prof = prof.run('execfile(%r)' % (sys.argv[0],))
-        except SystemExit:
-            pass
-    finally:
-        kg = KCacheGrind(prof)
-        kg.output(file(options.outfile, 'w'))
-
-if __name__ == '__main__':
-    sys.exit(main(sys.argv))
diff --git a/scrapy/xlib/pydispatch/__init__.py b/scrapy/xlib/pydispatch/__init__.py
deleted file mode 100644
index 78511a0d8..000000000
--- a/scrapy/xlib/pydispatch/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-"""Multi-consumer multi-producer dispatching mechanism
-"""
-__version__ = "2.0.0"
-__author__ = "Patrick K. O'Brien"
-__license__ = "BSD-style, see license.txt for details"
-
diff --git a/scrapy/xlib/pydispatch/dispatcher.py b/scrapy/xlib/pydispatch/dispatcher.py
deleted file mode 100644
index f5a9a611b..000000000
--- a/scrapy/xlib/pydispatch/dispatcher.py
+++ /dev/null
@@ -1,492 +0,0 @@
-"""Multiple-producer-multiple-consumer signal-dispatching
-
-dispatcher is the core of the PyDispatcher system,
-providing the primary API and the core logic for the
-system.
-
-Module attributes of note:
-
-	Any -- Singleton used to signal either "Any Sender" or
-		"Any Signal".  See documentation of the _Any class.
-	Anonymous -- Singleton used to signal "Anonymous Sender"
-		See documentation of the _Anonymous class.
-
-Internal attributes:
-	WEAKREF_TYPES -- tuple of types/classes which represent
-		weak references to receivers, and thus must be de-
-		referenced on retrieval to retrieve the callable
-		object
-	connections -- { senderkey (id) : { signal : [receivers...]}}
-	senders -- { senderkey (id) : weakref(sender) }
-		used for cleaning up sender references on sender
-		deletion
-	sendersBack -- { receiverkey (id) : [senderkey (id)...] }
-		used for cleaning up receiver references on receiver
-		deletion, (considerably speeds up the cleanup process
-		vs. the original code.)
-"""
-from __future__ import generators
-import types, weakref, six
-from scrapy.xlib.pydispatch import saferef, robustapply, errors
-
-__author__ = "Patrick K. O'Brien <pobrien@orbtech.com>"
-__cvsid__ = "$Id: dispatcher.py,v 1.1.1.1 2006/07/07 15:59:38 mcfletch Exp $"
-__version__ = "$Revision: 1.1.1.1 $"[11:-2]
-
-class _Parameter:
-	"""Used to represent default parameter values."""
-	def __repr__(self):
-		return self.__class__.__name__
-
-class _Any(_Parameter):
-	"""Singleton used to signal either "Any Sender" or "Any Signal"
-
-	The Any object can be used with connect, disconnect,
-	send, or sendExact to signal that the parameter given
-	Any should react to all senders/signals, not just
-	a particular sender/signal.
-	"""
-Any = _Any()
-
-class _Anonymous(_Parameter):
-	"""Singleton used to signal "Anonymous Sender"
-
-	The Anonymous object is used to signal that the sender
-	of a message is not specified (as distinct from being
-	"any sender").  Registering callbacks for Anonymous
-	will only receive messages sent without senders.  Sending
-	with anonymous will only send messages to those receivers
-	registered for Any or Anonymous.
-
-	Note:
-		The default sender for connect is Any, while the
-		default sender for send is Anonymous.  This has
-		the effect that if you do not specify any senders
-		in either function then all messages are routed
-		as though there was a single sender (Anonymous)
-		being used everywhere.
-	"""
-Anonymous = _Anonymous()
-
-WEAKREF_TYPES = (weakref.ReferenceType, saferef.BoundMethodWeakref)
-
-connections = {}
-senders = {}
-sendersBack = {}
-
-
-def connect(receiver, signal=Any, sender=Any, weak=True):
-	"""Connect receiver to sender for signal
-
-	receiver -- a callable Python object which is to receive
-		messages/signals/events.  Receivers must be hashable
-		objects.
-
-		if weak is True, then receiver must be weak-referencable
-		(more precisely saferef.safeRef() must be able to create
-		a reference to the receiver).
-	
-		Receivers are fairly flexible in their specification,
-		as the machinery in the robustApply module takes care
-		of most of the details regarding figuring out appropriate
-		subsets of the sent arguments to apply to a given
-		receiver.
-
-		Note:
-			if receiver is itself a weak reference (a callable),
-			it will be de-referenced by the system's machinery,
-			so *generally* weak references are not suitable as
-			receivers, though some use might be found for the
-			facility whereby a higher-level library passes in
-			pre-weakrefed receiver references.
-
-	signal -- the signal to which the receiver should respond
-	
-		if Any, receiver will receive any signal from the
-		indicated sender (which might also be Any, but is not
-		necessarily Any).
-		
-		Otherwise must be a hashable Python object other than
-		None (DispatcherError raised on None).
-		
-	sender -- the sender to which the receiver should respond
-	
-		if Any, receiver will receive the indicated signals
-		from any sender.
-		
-		if Anonymous, receiver will only receive indicated
-		signals from send/sendExact which do not specify a
-		sender, or specify Anonymous explicitly as the sender.
-
-		Otherwise can be any python object.
-		
-	weak -- whether to use weak references to the receiver
-		By default, the module will attempt to use weak
-		references to the receiver objects.  If this parameter
-		is false, then strong references will be used.
-
-	returns None, may raise DispatcherTypeError
-	"""
-	if signal is None:
-		raise errors.DispatcherTypeError(
-			'Signal cannot be None (receiver=%r sender=%r)'%( receiver,sender)
-		)
-	if weak:
-		receiver = saferef.safeRef(receiver, onDelete=_removeReceiver)
-	senderkey = id(sender)
-	if senderkey in connections:
-		signals = connections[senderkey]
-	else:
-		connections[senderkey] = signals = {}
-	# Keep track of senders for cleanup.
-	# Is Anonymous something we want to clean up?
-	if sender not in (None, Anonymous, Any):
-		def remove(object, senderkey=senderkey):
-			_removeSender(senderkey=senderkey)
-		# Skip objects that can not be weakly referenced, which means
-		# they won't be automatically cleaned up, but that's too bad.
-		try:
-			weakSender = weakref.ref(sender, remove)
-			senders[senderkey] = weakSender
-		except:
-			pass
-		
-	receiverID = id(receiver)
-	# get current set, remove any current references to
-	# this receiver in the set, including back-references
-	if signal in signals:
-		receivers = signals[signal]
-		_removeOldBackRefs(senderkey, signal, receiver, receivers)
-	else:
-		receivers = signals[signal] = []
-	try:
-		current = sendersBack.get( receiverID )
-		if current is None:
-			sendersBack[ receiverID ] = current = []
-		if senderkey not in current:
-			current.append(senderkey)
-	except:
-		pass
-
-	receivers.append(receiver)
-
-
-
-def disconnect(receiver, signal=Any, sender=Any, weak=True):
-	"""Disconnect receiver from sender for signal
-
-	receiver -- the registered receiver to disconnect
-	signal -- the registered signal to disconnect
-	sender -- the registered sender to disconnect
-	weak -- the weakref state to disconnect
-
-	disconnect reverses the process of connect,
-	the semantics for the individual elements are
-	logically equivalent to a tuple of
-	(receiver, signal, sender, weak) used as a key
-	to be deleted from the internal routing tables.
-	(The actual process is slightly more complex
-	but the semantics are basically the same).
-
-	Note:
-		Using disconnect is not required to cleanup
-		routing when an object is deleted, the framework
-		will remove routes for deleted objects
-		automatically.  It's only necessary to disconnect
-		if you want to stop routing to a live object.
-		
-	returns None, may raise DispatcherTypeError or
-		DispatcherKeyError
-	"""
-	if signal is None:
-		raise errors.DispatcherTypeError(
-			'Signal cannot be None (receiver=%r sender=%r)'%( receiver,sender)
-		)
-	if weak: receiver = saferef.safeRef(receiver)
-	senderkey = id(sender)
-	try:
-		signals = connections[senderkey]
-		receivers = signals[signal]
-	except KeyError:
-		raise errors.DispatcherKeyError(
-			"""No receivers found for signal %r from sender %r""" %(
-				signal,
-				sender
-			)
-		)
-	try:
-		# also removes from receivers
-		_removeOldBackRefs(senderkey, signal, receiver, receivers)
-	except ValueError:
-		raise errors.DispatcherKeyError(
-			"""No connection to receiver %s for signal %s from sender %s""" %(
-				receiver,
-				signal,
-				sender
-			)
-		)
-	_cleanupConnections(senderkey, signal)
-
-def getReceivers( sender = Any, signal = Any ):
-	"""Get list of receivers from global tables
-
-	This utility function allows you to retrieve the
-	raw list of receivers from the connections table
-	for the given sender and signal pair.
-
-	Note:
-		there is no guarantee that this is the actual list
-		stored in the connections table, so the value
-		should be treated as a simple iterable/truth value
-		rather than, for instance a list to which you
-		might append new records.
-
-	Normally you would use liveReceivers( getReceivers( ...))
-	to retrieve the actual receiver objects as an iterable
-	object.
-	"""
-	try:
-		return connections[id(sender)][signal]
-	except KeyError:
-		return []
-
-def liveReceivers(receivers):
-	"""Filter sequence of receivers to get resolved, live receivers
-
-	This is a generator which will iterate over
-	the passed sequence, checking for weak references
-	and resolving them, then returning all live
-	receivers.
-	"""
-	for receiver in receivers:
-		if isinstance( receiver, WEAKREF_TYPES):
-			# Dereference the weak reference.
-			receiver = receiver()
-			if receiver is not None:
-				yield receiver
-		else:
-			yield receiver
-
-
-
-def getAllReceivers( sender = Any, signal = Any ):
-	"""Get list of all receivers from global tables
-
-	This gets all receivers which should receive
-	the given signal from sender, each receiver should
-	be produced only once by the resulting generator
-	"""
-	receivers = {}
-	for set in (
-		# Get receivers that receive *this* signal from *this* sender.
-		getReceivers( sender, signal ),
-		# Add receivers that receive *any* signal from *this* sender.
-		getReceivers( sender, Any ),
-		# Add receivers that receive *this* signal from *any* sender.
-		getReceivers( Any, signal ),
-		# Add receivers that receive *any* signal from *any* sender.
-		getReceivers( Any, Any ),
-	):
-		for receiver in set:
-			if receiver: # filter out dead instance-method weakrefs
-				try:
-					if receiver not in receivers:
-						receivers[receiver] = 1
-						yield receiver
-				except TypeError:
-					# dead weakrefs raise TypeError on hash...
-					pass
-
-def send(signal=Any, sender=Anonymous, *arguments, **named):
-	"""Send signal from sender to all connected receivers.
-	
-	signal -- (hashable) signal value, see connect for details
-
-	sender -- the sender of the signal
-	
-		if Any, only receivers registered for Any will receive
-		the message.
-
-		if Anonymous, only receivers registered to receive
-		messages from Anonymous or Any will receive the message
-
-		Otherwise can be any python object (normally one
-		registered with a connect if you actually want
-		something to occur).
-
-	arguments -- positional arguments which will be passed to
-		*all* receivers. Note that this may raise TypeErrors
-		if the receivers do not allow the particular arguments.
-		Note also that arguments are applied before named
-		arguments, so they should be used with care.
-
-	named -- named arguments which will be filtered according
-		to the parameters of the receivers to only provide those
-		acceptable to the receiver.
-
-	Return a list of tuple pairs [(receiver, response), ... ]
-
-	if any receiver raises an error, the error propagates back
-	through send, terminating the dispatch loop, so it is quite
-	possible to not have all receivers called if a raises an
-	error.
-	"""
-	# Call each receiver with whatever arguments it can accept.
-	# Return a list of tuple pairs [(receiver, response), ... ].
-	responses = []
-	for receiver in liveReceivers(getAllReceivers(sender, signal)):
-		response = robustapply.robustApply(
-			receiver,
-			signal=signal,
-			sender=sender,
-			*arguments,
-			**named
-		)
-		responses.append((receiver, response))
-	return responses
-def sendExact( signal=Any, sender=Anonymous, *arguments, **named ):
-	"""Send signal only to those receivers registered for exact message
-
-	sendExact allows for avoiding Any/Anonymous registered
-	handlers, sending only to those receivers explicitly
-	registered for a particular signal on a particular
-	sender.
-	"""
-	responses = []
-	for receiver in liveReceivers(getReceivers(sender, signal)):
-		response = robustapply.robustApply(
-			receiver,
-			signal=signal,
-			sender=sender,
-			*arguments,
-			**named
-		)
-		responses.append((receiver, response))
-	return responses
-	
-
-def _removeReceiver(receiver):
-	"""Remove receiver from connections."""
-	if not sendersBack:
-		# During module cleanup the mapping will be replaced with None
-		return False
-	backKey = id(receiver)
-	try:
-		backSet = sendersBack.pop(backKey)
-	except KeyError as err:
-		return False 
-	else:
-		for senderkey in backSet:
-			try:
-				signals = connections[senderkey].keys()
-			except KeyError as err:
-				pass
-			else:
-				for signal in signals:
-					try:
-						receivers = connections[senderkey][signal]
-					except KeyError:
-						pass
-					else:
-						try:
-							receivers.remove( receiver )
-						except Exception as err:
-							pass
-					_cleanupConnections(senderkey, signal)
-
-def _cleanupConnections(senderkey, signal):
-	"""Delete any empty signals for senderkey. Delete senderkey if empty."""
-	try:
-		receivers = connections[senderkey][signal]
-	except:
-		pass
-	else:
-		if not receivers:
-			# No more connected receivers. Therefore, remove the signal.
-			try:
-				signals = connections[senderkey]
-			except KeyError:
-				pass
-			else:
-				del signals[signal]
-				if not signals:
-					# No more signal connections. Therefore, remove the sender.
-					_removeSender(senderkey)
-
-def _removeSender(senderkey):
-	"""Remove senderkey from connections."""
-	_removeBackrefs(senderkey)
-	try:
-		del connections[senderkey]
-	except KeyError:
-		pass
-	# Senderkey will only be in senders dictionary if sender 
-	# could be weakly referenced.
-	try: 
-		del senders[senderkey]
-	except: 
-		pass
-
-
-def _removeBackrefs( senderkey):
-	"""Remove all back-references to this senderkey"""
-	try:
-		signals = connections[senderkey]
-	except KeyError:
-		signals = None
-	else:
-		items = signals.items()
-		def allReceivers( ):
-			for signal,set in items:
-				for item in set:
-					yield item
-		for receiver in allReceivers():
-			_killBackref( receiver, senderkey )
-
-def _removeOldBackRefs(senderkey, signal, receiver, receivers):
-	"""Kill old sendersBack references from receiver
-
-	This guards against multiple registration of the same
-	receiver for a given signal and sender leaking memory
-	as old back reference records build up.
-
-	Also removes old receiver instance from receivers
-	"""
-	try:
-		index = receivers.index(receiver)
-		# need to scan back references here and remove senderkey
-	except ValueError:
-		return False
-	else:
-		oldReceiver = receivers[index]
-		del receivers[index]
-		found = 0
-		signals = connections.get(signal)
-		if signals is not None:
-			for sig, recs in six.iteritems(connections.get(signal,{})):
-				if sig != signal:
-					for rec in recs:
-						if rec is oldReceiver:
-							found = 1
-							break
-		if not found:
-			_killBackref( oldReceiver, senderkey )
-			return True
-		return False
-		
-		
-def _killBackref( receiver, senderkey ):
-	"""Do the actual removal of back reference from receiver to senderkey"""
-	receiverkey = id(receiver)
-	set = sendersBack.get( receiverkey, () )
-	while senderkey in set:
-		try:
-			set.remove( senderkey )
-		except:
-			break
-	if not set:
-		try:
-			del sendersBack[ receiverkey ]
-		except KeyError:
-			pass
-	return True
diff --git a/scrapy/xlib/pydispatch/errors.py b/scrapy/xlib/pydispatch/errors.py
deleted file mode 100644
index a2eb32ed7..000000000
--- a/scrapy/xlib/pydispatch/errors.py
+++ /dev/null
@@ -1,10 +0,0 @@
-"""Error types for dispatcher mechanism
-"""
-
-class DispatcherError(Exception):
-	"""Base class for all Dispatcher errors"""
-class DispatcherKeyError(KeyError, DispatcherError):
-	"""Error raised when unknown (sender,signal) set specified"""
-class DispatcherTypeError(TypeError, DispatcherError):
-	"""Error raised when inappropriate signal-type specified (None)"""
-
diff --git a/scrapy/xlib/pydispatch/license.txt b/scrapy/xlib/pydispatch/license.txt
deleted file mode 100644
index 0960a6073..000000000
--- a/scrapy/xlib/pydispatch/license.txt
+++ /dev/null
@@ -1,34 +0,0 @@
-PyDispatcher License
-
-	Copyright (c) 2001-2006, Patrick K. O'Brien and Contributors
-	All rights reserved.
-	
-	Redistribution and use in source and binary forms, with or without
-	modification, are permitted provided that the following conditions
-	are met:
-	
-		Redistributions of source code must retain the above copyright
-		notice, this list of conditions and the following disclaimer.
-	
-		Redistributions in binary form must reproduce the above
-		copyright notice, this list of conditions and the following
-		disclaimer in the documentation and/or other materials
-		provided with the distribution.
-	
-		The name of Patrick K. O'Brien, or the name of any Contributor,
-		may not be used to endorse or promote products derived from this 
-		software without specific prior written permission.
-	
-	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-	``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-	FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-	COPYRIGHT HOLDERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
-	INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-	(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-	SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-	HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-	STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-	ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-	OF THE POSSIBILITY OF SUCH DAMAGE. 
-
diff --git a/scrapy/xlib/pydispatch/robust.py b/scrapy/xlib/pydispatch/robust.py
deleted file mode 100644
index f829dbfd9..000000000
--- a/scrapy/xlib/pydispatch/robust.py
+++ /dev/null
@@ -1,57 +0,0 @@
-"""Module implementing error-catching version of send (sendRobust)"""
-from scrapy.xlib.pydispatch.dispatcher import Any, Anonymous, liveReceivers, getAllReceivers
-from scrapy.xlib.pydispatch.robustapply import robustApply
-
-def sendRobust(
-	signal=Any, 
-	sender=Anonymous, 
-	*arguments, **named
-):
-	"""Send signal from sender to all connected receivers catching errors
-	
-	signal -- (hashable) signal value, see connect for details
-
-	sender -- the sender of the signal
-	
-		if Any, only receivers registered for Any will receive
-		the message.
-
-		if Anonymous, only receivers registered to receive
-		messages from Anonymous or Any will receive the message
-
-		Otherwise can be any python object (normally one
-		registered with a connect if you actually want
-		something to occur).
-
-	arguments -- positional arguments which will be passed to
-		*all* receivers. Note that this may raise TypeErrors
-		if the receivers do not allow the particular arguments.
-		Note also that arguments are applied before named
-		arguments, so they should be used with care.
-
-	named -- named arguments which will be filtered according
-		to the parameters of the receivers to only provide those
-		acceptable to the receiver.
-
-	Return a list of tuple pairs [(receiver, response), ... ]
-
-	if any receiver raises an error (specifically any subclass of Exception),
-	the error instance is returned as the result for that receiver.
-	"""
-	# Call each receiver with whatever arguments it can accept.
-	# Return a list of tuple pairs [(receiver, response), ... ].
-	responses = []
-	for receiver in liveReceivers(getAllReceivers(sender, signal)):
-		try:
-			response = robustApply(
-				receiver,
-				signal=signal,
-				sender=sender,
-				*arguments,
-				**named
-			)
-		except Exception as err:
-			responses.append((receiver, err))
-		else:
-			responses.append((receiver, response))
-	return responses
diff --git a/scrapy/xlib/pydispatch/robustapply.py b/scrapy/xlib/pydispatch/robustapply.py
deleted file mode 100644
index 5deda3c42..000000000
--- a/scrapy/xlib/pydispatch/robustapply.py
+++ /dev/null
@@ -1,55 +0,0 @@
-"""Robust apply mechanism
-
-Provides a function "call", which can sort out
-what arguments a given callable object can take,
-and subset the given arguments to match only
-those which are acceptable.
-"""
-
-import inspect
-
-def function(receiver):
-    """Get function-like callable object for given receiver
-
-    returns (function_or_method, codeObject, fromMethod)
-
-    If fromMethod is true, then the callable already
-    has its first argument bound
-    """
-    if inspect.isclass(receiver) and hasattr(receiver, '__call__'):
-        # receiver is a class instance; assume it is callable.
-        # Reassign receiver to the actual method that will be called.
-        if hasattr(receiver.__call__, 'im_func') or \
-           hasattr(receiver.__call__, 'im_code'):
-            receiver = receiver.__call__
-
-    if hasattr( receiver, 'im_func' ):
-        # an instance-method...
-        return receiver, receiver.im_func.func_code, 1
-    elif not hasattr(receiver, 'func_code'):
-        raise ValueError('unknown receiver type %s %s'%(receiver, type(receiver)))
-
-    return receiver, receiver.func_code, 0
-
-def robustApply(receiver, *arguments, **named):
-    """Call receiver with arguments and an appropriate subset of named
-    """
-    receiver, codeObject, startIndex = function(receiver)
-    acceptable = codeObject.co_varnames[startIndex+len(arguments):codeObject.co_argcount]
-    for name in codeObject.co_varnames[startIndex:startIndex+len(arguments)]:
-        if name in named:
-            raise TypeError(
-                """Argument %r specified both positionally and as a keyword for calling %r"""% (
-                    name, receiver,
-                )
-            )
-
-    if not (codeObject.co_flags & 8):
-        # fc does not have a **kwds type parameter, therefore 
-        # remove unacceptable arguments.
-        for arg in named.keys():
-            if arg not in acceptable:
-                del named[arg]
-
-    return receiver(*arguments, **named)
-
diff --git a/scrapy/xlib/pydispatch/saferef.py b/scrapy/xlib/pydispatch/saferef.py
deleted file mode 100644
index f1b8b1f9b..000000000
--- a/scrapy/xlib/pydispatch/saferef.py
+++ /dev/null
@@ -1,166 +0,0 @@
-"""Refactored "safe reference" from dispatcher.py"""
-from __future__ import print_function
-import weakref, traceback
-
-def safeRef(target, onDelete = None):
-	"""Return a *safe* weak reference to a callable target
-
-	target -- the object to be weakly referenced, if it's a
-		bound method reference, will create a BoundMethodWeakref,
-		otherwise creates a simple weakref.
-	onDelete -- if provided, will have a hard reference stored
-		to the callable to be called after the safe reference
-		goes out of scope with the reference object, (either a
-		weakref or a BoundMethodWeakref) as argument.
-	"""
-	if hasattr(target, 'im_self'):
-		if target.im_self is not None:
-			# Turn a bound method into a BoundMethodWeakref instance.
-			# Keep track of these instances for lookup by disconnect().
-			assert hasattr(target, 'im_func'), """safeRef target %r has im_self, but no im_func, don't know how to create reference"""%( target,)
-			reference = BoundMethodWeakref(
-				target=target,
-				onDelete=onDelete
-			)
-			return reference
-	if onDelete is not None:
-		return weakref.ref(target, onDelete)
-	else:
-		return weakref.ref( target )
-
-class BoundMethodWeakref(object):
-	"""'Safe' and reusable weak references to instance methods
-
-	BoundMethodWeakref objects provide a mechanism for
-	referencing a bound method without requiring that the
-	method object itself (which is normally a transient
-	object) is kept alive.  Instead, the BoundMethodWeakref
-	object keeps weak references to both the object and the
-	function which together define the instance method.
-
-	Attributes:
-		key -- the identity key for the reference, calculated
-			by the class's calculateKey method applied to the
-			target instance method
-		deletionMethods -- sequence of callable objects taking
-			single argument, a reference to this object which
-			will be called when *either* the target object or
-			target function is garbage collected (i.e. when
-			this object becomes invalid).  These are specified
-			as the onDelete parameters of safeRef calls.
-		weakSelf -- weak reference to the target object
-		weakFunc -- weak reference to the target function
-
-	Class Attributes:
-		_allInstances -- class attribute pointing to all live
-			BoundMethodWeakref objects indexed by the class's
-			calculateKey(target) method applied to the target
-			objects.  This weak value dictionary is used to
-			short-circuit creation so that multiple references
-			to the same (object, function) pair produce the
-			same BoundMethodWeakref instance.
-
-	"""
-	_allInstances = weakref.WeakValueDictionary()
-	def __new__( cls, target, onDelete=None, *arguments,**named ):
-		"""Create new instance or return current instance
-
-		Basically this method of construction allows us to
-		short-circuit creation of references to already-
-		referenced instance methods.  The key corresponding
-		to the target is calculated, and if there is already
-		an existing reference, that is returned, with its
-		deletionMethods attribute updated.  Otherwise the
-		new instance is created and registered in the table
-		of already-referenced methods.
-		"""
-		key = cls.calculateKey(target)
-		current =cls._allInstances.get(key)
-		if current is not None:
-			current.deletionMethods.append( onDelete)
-			return current
-		else:
-			base = super( BoundMethodWeakref, cls).__new__( cls )
-			cls._allInstances[key] = base
-			base.__init__( target, onDelete, *arguments,**named)
-			return base
-	def __init__(self, target, onDelete=None):
-		"""Return a weak-reference-like instance for a bound method
-
-		target -- the instance-method target for the weak
-			reference, must have im_self and im_func attributes
-			and be reconstructable via:
-				target.im_func.__get__( target.im_self )
-			which is true of built-in instance methods.
-		onDelete -- optional callback which will be called
-			when this weak reference ceases to be valid
-			(i.e. either the object or the function is garbage
-			collected).  Should take a single argument,
-			which will be passed a pointer to this object.
-		"""
-		def remove(weak, self=self):
-			"""Set self.isDead to true when method or instance is destroyed"""
-			methods = self.deletionMethods[:]
-			del self.deletionMethods[:]
-			try:
-				del self.__class__._allInstances[ self.key ]
-			except KeyError:
-				pass
-			for function in methods:
-				try:
-					if callable( function ):
-						function( self )
-				except Exception as e:
-					try:
-						traceback.print_exc()
-					except AttributeError as err:
-						print('''Exception during saferef %s cleanup function %s: %s'''%(
-							self, function, e
-						))
-		self.deletionMethods = [onDelete]
-		self.key = self.calculateKey( target )
-		self.weakSelf = weakref.ref(target.im_self, remove)
-		self.weakFunc = weakref.ref(target.im_func, remove)
-		self.selfName = target.im_self.__class__.__name__
-		self.funcName = str(target.im_func.__name__)
-	def calculateKey( cls, target ):
-		"""Calculate the reference key for this reference
-
-		Currently this is a two-tuple of the id()'s of the
-		target object and the target function respectively.
-		"""
-		return (id(target.im_self),id(target.im_func))
-	calculateKey = classmethod( calculateKey )
-	def __str__(self):
-		"""Give a friendly representation of the object"""
-		return """%s( %s.%s )"""%(
-			self.__class__.__name__,
-			self.selfName,
-			self.funcName,
-		)
-	__repr__ = __str__
-	def __nonzero__( self ):
-		"""Whether we are still a valid reference"""
-		return self() is not None
-	def __cmp__( self, other ):
-		"""Compare with another reference"""
-		if not isinstance (other,self.__class__):
-			return cmp( self.__class__, type(other) )
-		return cmp( self.key, other.key)
-	def __call__(self):
-		"""Return a strong reference to the bound method
-
-		If the target cannot be retrieved, then will
-		return None, otherwise returns a bound instance
-		method for our object and function.
-
-		Note:
-			You may call this method any number of times,
-			as it does not invalidate the reference.
-		"""
-		target = self.weakSelf()
-		if target is not None:
-			function = self.weakFunc()
-			if function is not None:
-				return function.__get__(target)
-		return None
diff --git a/scrapy/xlib/tx/LICENSE b/scrapy/xlib/tx/LICENSE
deleted file mode 100644
index 8529f6edf..000000000
--- a/scrapy/xlib/tx/LICENSE
+++ /dev/null
@@ -1,57 +0,0 @@
-Copyright (c) 2001-2013
-Allen Short
-Andy Gayton
-Andrew Bennetts
-Antoine Pitrou
-Apple Computer, Inc.
-Benjamin Bruheim
-Bob Ippolito
-Canonical Limited
-Christopher Armstrong
-David Reid
-Donovan Preston
-Eric Mangold
-Eyal Lotem
-Itamar Turner-Trauring
-James Knight
-Jason A. Mobarak
-Jean-Paul Calderone
-Jessica McKellar
-Jonathan Jacobs
-Jonathan Lange
-Jonathan D. Simms
-Jürgen Hermann
-Kevin Horn
-Kevin Turner
-Mary Gardiner
-Matthew Lefkowitz
-Massachusetts Institute of Technology
-Moshe Zadka
-Paul Swartz
-Pavel Pergamenshchik
-Ralph Meijer
-Sean Riley
-Software Freedom Conservancy
-Travis B. Hartwell
-Thijs Triemstra
-Thomas Herve
-Timothy Allen
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/scrapy/xlib/tx/README b/scrapy/xlib/tx/README
deleted file mode 100644
index 75ef485ce..000000000
--- a/scrapy/xlib/tx/README
+++ /dev/null
@@ -1,2 +0,0 @@
-This source files are adapted copies from Twisted trunk to support HTTP1.1
-handler under Twisted >= 11.1 and Twisted <= 13.0.0
diff --git a/scrapy/xlib/tx/__init__.py b/scrapy/xlib/tx/__init__.py
deleted file mode 100644
index 1ac4e0108..000000000
--- a/scrapy/xlib/tx/__init__.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from scrapy import twisted_version
-if twisted_version > (13, 0, 0):
-    from twisted.web import client
-    from twisted.internet import endpoints
-if twisted_version >= (11, 1, 0):
-    from . import client, endpoints
-else:
-    from scrapy.exceptions import NotSupported
-    class _Mocked(object):
-        def __init__(self, *args, **kw):
-            raise NotSupported('HTTP1.1 not supported')
-    class _Mock(object):
-        def __getattr__(self, name):
-            return _Mocked
-    client = endpoints = _Mock()
-
-
-Agent = client.Agent
-ProxyAgent = client.ProxyAgent
-ResponseDone = client.ResponseDone
-ResponseFailed = client.ResponseFailed
-HTTPConnectionPool = client.HTTPConnectionPool
-TCP4ClientEndpoint = endpoints.TCP4ClientEndpoint
diff --git a/scrapy/xlib/tx/_newclient.py b/scrapy/xlib/tx/_newclient.py
deleted file mode 100644
index 16d0ca6b4..000000000
--- a/scrapy/xlib/tx/_newclient.py
+++ /dev/null
@@ -1,1516 +0,0 @@
-# -*- test-case-name: twisted.web.test.test_newclient -*-
-# Copyright (c) Twisted Matrix Laboratories.
-# See LICENSE for details.
-
-"""
-An U{HTTP 1.1<http://www.w3.org/Protocols/rfc2616/rfc2616.html>} client.
-
-The way to use the functionality provided by this module is to:
-
-  - Connect a L{HTTP11ClientProtocol} to an HTTP server
-  - Create a L{Request} with the appropriate data
-  - Pass the request to L{HTTP11ClientProtocol.request}
-  - The returned Deferred will fire with a L{Response} object
-  - Create a L{IProtocol} provider which can handle the response body
-  - Connect it to the response with L{Response.deliverBody}
-  - When the protocol's C{connectionLost} method is called, the response is
-    complete.  See L{Response.deliverBody} for details.
-
-Various other classes in this module support this usage:
-
-  - HTTPParser is the basic HTTP parser.  It can handle the parts of HTTP which
-    are symmetric between requests and responses.
-
-  - HTTPClientParser extends HTTPParser to handle response-specific parts of
-    HTTP.  One instance is created for each request to parse the corresponding
-    response.
-"""
-
-__metaclass__ = type
-
-from zope.interface import implements
-
-from twisted.python import log
-from twisted.python.reflect import fullyQualifiedName
-from twisted.python.failure import Failure
-from twisted.internet.interfaces import IConsumer, IPushProducer
-from twisted.internet.error import ConnectionDone
-from twisted.internet.defer import Deferred, succeed, fail, maybeDeferred
-from twisted.internet.defer import CancelledError
-from twisted.internet.protocol import Protocol
-from twisted.protocols.basic import LineReceiver
-from twisted.web.http_headers import Headers
-from twisted.web.http import NO_CONTENT, NOT_MODIFIED
-from twisted.web.http import _DataLoss, PotentialDataLoss
-from twisted.web.http import _IdentityTransferDecoder, _ChunkedTransferDecoder
-
-from .iweb import IResponse, UNKNOWN_LENGTH
-
-# States HTTPParser can be in
-STATUS = 'STATUS'
-HEADER = 'HEADER'
-BODY = 'BODY'
-DONE = 'DONE'
-
-
-class BadHeaders(Exception):
-    """
-    Headers passed to L{Request} were in some way invalid.
-    """
-
-
-
-class ExcessWrite(Exception):
-    """
-    The body L{IBodyProducer} for a request tried to write data after
-    indicating it had finished writing data.
-    """
-
-
-class ParseError(Exception):
-    """
-    Some received data could not be parsed.
-
-    @ivar data: The string which could not be parsed.
-    """
-    def __init__(self, reason, data):
-        Exception.__init__(self, reason, data)
-        self.data = data
-
-
-
-class BadResponseVersion(ParseError):
-    """
-    The version string in a status line was unparsable.
-    """
-
-
-
-class _WrapperException(Exception):
-    """
-    L{_WrapperException} is the base exception type for exceptions which
-    include one or more other exceptions as the low-level causes.
-
-    @ivar reasons: A list of exceptions.  See subclass documentation for more
-        details.
-    """
-    def __init__(self, reasons):
-        Exception.__init__(self, reasons)
-        self.reasons = reasons
-
-
-
-class RequestGenerationFailed(_WrapperException):
-    """
-    There was an error while creating the bytes which make up a request.
-
-    @ivar reasons: A C{list} of one or more L{Failure} instances giving the
-        reasons the request generation was considered to have failed.
-    """
-
-
-
-class RequestTransmissionFailed(_WrapperException):
-    """
-    There was an error while sending the bytes which make up a request.
-
-    @ivar reasons: A C{list} of one or more L{Failure} instances giving the
-        reasons the request transmission was considered to have failed.
-    """
-
-
-
-class ConnectionAborted(Exception):
-    """
-    The connection was explicitly aborted by application code.
-    """
-
-
-
-class WrongBodyLength(Exception):
-    """
-    An L{IBodyProducer} declared the number of bytes it was going to
-    produce (via its C{length} attribute) and then produced a different number
-    of bytes.
-    """
-
-
-
-class ResponseDone(Exception):
-    """
-    L{ResponseDone} may be passed to L{IProtocol.connectionLost} on the
-    protocol passed to L{Response.deliverBody} and indicates that the entire
-    response has been delivered.
-    """
-
-
-
-class ResponseFailed(_WrapperException):
-    """
-    L{ResponseFailed} indicates that all of the response to a request was not
-    received for some reason.
-
-    @ivar reasons: A C{list} of one or more L{Failure} instances giving the
-        reasons the response was considered to have failed.
-
-    @ivar response: If specified, the L{Response} received from the server (and
-        in particular the status code and the headers).
-    """
-
-    def __init__(self, reasons, response=None):
-        _WrapperException.__init__(self, reasons)
-        self.response = response
-
-
-
-class ResponseNeverReceived(ResponseFailed):
-    """
-    A L{ResponseFailed} that knows no response bytes at all have been received.
-    """
-
-
-
-class RequestNotSent(Exception):
-    """
-    L{RequestNotSent} indicates that an attempt was made to issue a request but
-    for reasons unrelated to the details of the request itself, the request
-    could not be sent.  For example, this may indicate that an attempt was made
-    to send a request using a protocol which is no longer connected to a
-    server.
-    """
-
-
-
-def _callAppFunction(function):
-    """
-    Call C{function}.  If it raises an exception, log it with a minimal
-    description of the source.
-
-    @return: C{None}
-    """
-    try:
-        function()
-    except:
-        log.err(None, "Unexpected exception from %s" % (
-                fullyQualifiedName(function),))
-
-
-
-class HTTPParser(LineReceiver):
-    """
-    L{HTTPParser} handles the parsing side of HTTP processing. With a suitable
-    subclass, it can parse either the client side or the server side of the
-    connection.
-
-    @ivar headers: All of the non-connection control message headers yet
-        received.
-
-    @ivar state: State indicator for the response parsing state machine.  One
-        of C{STATUS}, C{HEADER}, C{BODY}, C{DONE}.
-
-    @ivar _partialHeader: C{None} or a C{list} of the lines of a multiline
-        header while that header is being received.
-    """
-
-    # NOTE: According to HTTP spec, we're supposed to eat the
-    # 'Proxy-Authenticate' and 'Proxy-Authorization' headers also, but that
-    # doesn't sound like a good idea to me, because it makes it impossible to
-    # have a non-authenticating transparent proxy in front of an authenticating
-    # proxy. An authenticating proxy can eat them itself. -jknight
-    #
-    # Further, quoting
-    # http://homepages.tesco.net/J.deBoynePollard/FGA/web-proxy-connection-header.html
-    # regarding the 'Proxy-Connection' header:
-    #
-    #    The Proxy-Connection: header is a mistake in how some web browsers
-    #    use HTTP. Its name is the result of a false analogy. It is not a
-    #    standard part of the protocol. There is a different standard
-    #    protocol mechanism for doing what it does. And its existence
-    #    imposes a requirement upon HTTP servers such that no proxy HTTP
-    #    server can be standards-conforming in practice.
-    #
-    # -exarkun
-
-    # Some servers (like http://news.ycombinator.com/) return status lines and
-    # HTTP headers delimited by \n instead of \r\n.
-    delimiter = '\n'
-
-    CONNECTION_CONTROL_HEADERS = set([
-            'content-length', 'connection', 'keep-alive', 'te', 'trailers',
-            'transfer-encoding', 'upgrade', 'proxy-connection'])
-
-    def connectionMade(self):
-        self.headers = Headers()
-        self.connHeaders = Headers()
-        self.state = STATUS
-        self._partialHeader = None
-
-
-    def switchToBodyMode(self, decoder):
-        """
-        Switch to body parsing mode - interpret any more bytes delivered as
-        part of the message body and deliver them to the given decoder.
-        """
-        if self.state == BODY:
-            raise RuntimeError("already in body mode")
-
-        self.bodyDecoder = decoder
-        self.state = BODY
-        self.setRawMode()
-
-
-    def lineReceived(self, line):
-        """
-        Handle one line from a response.
-        """
-        # Handle the normal CR LF case.
-        if line[-1:] == '\r':
-            line = line[:-1]
-
-        if self.state == STATUS:
-            self.statusReceived(line)
-            self.state = HEADER
-        elif self.state == HEADER:
-            if not line or line[0] not in ' \t':
-                if self._partialHeader is not None:
-                    header = ''.join(self._partialHeader)
-                    name, value = header.split(':', 1)
-                    value = value.strip()
-                    self.headerReceived(name, value)
-                if not line:
-                    # Empty line means the header section is over.
-                    self.allHeadersReceived()
-                else:
-                    # Line not beginning with LWS is another header.
-                    self._partialHeader = [line]
-            else:
-                # A line beginning with LWS is a continuation of a header
-                # begun on a previous line.
-                self._partialHeader.append(line)
-
-
-    def rawDataReceived(self, data):
-        """
-        Pass data from the message body to the body decoder object.
-        """
-        self.bodyDecoder.dataReceived(data)
-
-
-    def isConnectionControlHeader(self, name):
-        """
-        Return C{True} if the given lower-cased name is the name of a
-        connection control header (rather than an entity header).
-
-        According to RFC 2616, section 14.10, the tokens in the Connection
-        header are probably relevant here.  However, I am not sure what the
-        practical consequences of either implementing or ignoring that are.
-        So I leave it unimplemented for the time being.
-        """
-        return name in self.CONNECTION_CONTROL_HEADERS
-
-
-    def statusReceived(self, status):
-        """
-        Callback invoked whenever the first line of a new message is received.
-        Override this.
-
-        @param status: The first line of an HTTP request or response message
-            without trailing I{CR LF}.
-        @type status: C{str}
-        """
-
-
-    def headerReceived(self, name, value):
-        """
-        Store the given header in C{self.headers}.
-        """
-        name = name.lower()
-        if self.isConnectionControlHeader(name):
-            headers = self.connHeaders
-        else:
-            headers = self.headers
-        headers.addRawHeader(name, value)
-
-
-    def allHeadersReceived(self):
-        """
-        Callback invoked after the last header is passed to C{headerReceived}.
-        Override this to change to the C{BODY} or C{DONE} state.
-        """
-        self.switchToBodyMode(None)
-
-
-
-class HTTPClientParser(HTTPParser):
-    """
-    An HTTP parser which only handles HTTP responses.
-
-    @ivar request: The request with which the expected response is associated.
-    @type request: L{Request}
-
-    @ivar NO_BODY_CODES: A C{set} of response codes which B{MUST NOT} have a
-        body.
-
-    @ivar finisher: A callable to invoke when this response is fully parsed.
-
-    @ivar _responseDeferred: A L{Deferred} which will be called back with the
-        response when all headers in the response have been received.
-        Thereafter, C{None}.
-
-    @ivar _everReceivedData: C{True} if any bytes have been received.
-    """
-    NO_BODY_CODES = set([NO_CONTENT, NOT_MODIFIED])
-
-    _transferDecoders = {
-        'chunked': _ChunkedTransferDecoder,
-        }
-
-    bodyDecoder = None
-
-    def __init__(self, request, finisher):
-        self.request = request
-        self.finisher = finisher
-        self._responseDeferred = Deferred()
-        self._everReceivedData = False
-
-
-    def dataReceived(self, data):
-        """
-        Override so that we know if any response has been received.
-        """
-        self._everReceivedData = True
-        HTTPParser.dataReceived(self, data)
-
-
-    def parseVersion(self, strversion):
-        """
-        Parse version strings of the form Protocol '/' Major '.' Minor. E.g.
-        'HTTP/1.1'.  Returns (protocol, major, minor).  Will raise ValueError
-        on bad syntax.
-        """
-        try:
-            proto, strnumber = strversion.split('/')
-            major, minor = strnumber.split('.')
-            major, minor = int(major), int(minor)
-        except ValueError as e:
-            raise BadResponseVersion(str(e), strversion)
-        if major < 0 or minor < 0:
-            raise BadResponseVersion("version may not be negative", strversion)
-        return (proto, major, minor)
-
-
-    def statusReceived(self, status):
-        """
-        Parse the status line into its components and create a response object
-        to keep track of this response's state.
-        """
-        parts = status.split(' ', 2)
-        if len(parts) != 3:
-            raise ParseError("wrong number of parts", status)
-
-        try:
-            statusCode = int(parts[1])
-        except ValueError:
-            raise ParseError("non-integer status code", status)
-
-        self.response = Response(
-            self.parseVersion(parts[0]),
-            statusCode,
-            parts[2],
-            self.headers,
-            self.transport)
-
-
-    def _finished(self, rest):
-        """
-        Called to indicate that an entire response has been received.  No more
-        bytes will be interpreted by this L{HTTPClientParser}.  Extra bytes are
-        passed up and the state of this L{HTTPClientParser} is set to I{DONE}.
-
-        @param rest: A C{str} giving any extra bytes delivered to this
-            L{HTTPClientParser} which are not part of the response being
-            parsed.
-        """
-        self.state = DONE
-        self.finisher(rest)
-
-
-    def isConnectionControlHeader(self, name):
-        """
-        Content-Length in the response to a HEAD request is an entity header,
-        not a connection control header.
-        """
-        if self.request.method == 'HEAD' and name == 'content-length':
-            return False
-        return HTTPParser.isConnectionControlHeader(self, name)
-
-
-    def allHeadersReceived(self):
-        """
-        Figure out how long the response body is going to be by examining
-        headers and stuff.
-        """
-        if (self.response.code in self.NO_BODY_CODES
-            or self.request.method == 'HEAD'):
-            self.response.length = 0
-            self._finished(self.clearLineBuffer())
-        else:
-            transferEncodingHeaders = self.connHeaders.getRawHeaders(
-                'transfer-encoding')
-            if transferEncodingHeaders:
-
-                # This could be a KeyError.  However, that would mean we do not
-                # know how to decode the response body, so failing the request
-                # is as good a behavior as any.  Perhaps someday we will want
-                # to normalize/document/test this specifically, but failing
-                # seems fine to me for now.
-                transferDecoder = self._transferDecoders[transferEncodingHeaders[0].lower()]
-
-                # If anyone ever invents a transfer encoding other than
-                # chunked (yea right), and that transfer encoding can predict
-                # the length of the response body, it might be sensible to
-                # allow the transfer decoder to set the response object's
-                # length attribute.
-            else:
-                contentLengthHeaders = self.connHeaders.getRawHeaders('content-length')
-                if contentLengthHeaders is None:
-                    contentLength = None
-                elif len(contentLengthHeaders) == 1:
-                    contentLength = int(contentLengthHeaders[0])
-                    self.response.length = contentLength
-                else:
-                    # "HTTP Message Splitting" or "HTTP Response Smuggling"
-                    # potentially happening.  Or it's just a buggy server.
-                    raise ValueError(
-                        "Too many Content-Length headers; response is invalid")
-
-                if contentLength == 0:
-                    self._finished(self.clearLineBuffer())
-                    transferDecoder = None
-                else:
-                    transferDecoder = lambda x, y: _IdentityTransferDecoder(
-                        contentLength, x, y)
-
-            if transferDecoder is None:
-                self.response._bodyDataFinished()
-            else:
-                # Make sure as little data as possible from the response body
-                # gets delivered to the response object until the response
-                # object actually indicates it is ready to handle bytes
-                # (probably because an application gave it a way to interpret
-                # them).
-                self.transport.pauseProducing()
-                self.switchToBodyMode(transferDecoder(
-                        self.response._bodyDataReceived,
-                        self._finished))
-
-        # This must be last.  If it were first, then application code might
-        # change some state (for example, registering a protocol to receive the
-        # response body).  Then the pauseProducing above would be wrong since
-        # the response is ready for bytes and nothing else would ever resume
-        # the transport.
-        self._responseDeferred.callback(self.response)
-        del self._responseDeferred
-
-
-    def connectionLost(self, reason):
-        if self.bodyDecoder is not None:
-            try:
-                try:
-                    self.bodyDecoder.noMoreData()
-                except PotentialDataLoss:
-                    self.response._bodyDataFinished(Failure())
-                except _DataLoss:
-                    self.response._bodyDataFinished(
-                        Failure(ResponseFailed([reason, Failure()],
-                                               self.response)))
-                else:
-                    self.response._bodyDataFinished()
-            except:
-                # Handle exceptions from both the except suites and the else
-                # suite.  Those functions really shouldn't raise exceptions,
-                # but maybe there's some buggy application code somewhere
-                # making things difficult.
-                log.err()
-        elif self.state != DONE:
-            if self._everReceivedData:
-                exceptionClass = ResponseFailed
-            else:
-                exceptionClass = ResponseNeverReceived
-            self._responseDeferred.errback(Failure(exceptionClass([reason])))
-            del self._responseDeferred
-
-
-
-class Request:
-    """
-    A L{Request} instance describes an HTTP request to be sent to an HTTP
-    server.
-
-    @ivar method: The HTTP method to for this request, ex: 'GET', 'HEAD',
-        'POST', etc.
-    @type method: C{str}
-
-    @ivar uri: The relative URI of the resource to request.  For example,
-        C{'/foo/bar?baz=quux'}.
-    @type uri: C{str}
-
-    @ivar headers: Headers to be sent to the server.  It is important to
-        note that this object does not create any implicit headers.  So it
-        is up to the HTTP Client to add required headers such as 'Host'.
-    @type headers: L{twisted.web.http_headers.Headers}
-
-    @ivar bodyProducer: C{None} or an L{IBodyProducer} provider which
-        produces the content body to send to the remote HTTP server.
-
-    @ivar persistent: Set to C{True} when you use HTTP persistent connection.
-    @type persistent: C{bool}
-    """
-    def __init__(self, method, uri, headers, bodyProducer, persistent=False):
-        self.method = method
-        self.uri = uri
-        self.headers = headers
-        self.bodyProducer = bodyProducer
-        self.persistent = persistent
-
-
-    def _writeHeaders(self, transport, TEorCL):
-        hosts = self.headers.getRawHeaders('host', ())
-        if len(hosts) != 1:
-            raise BadHeaders("Exactly one Host header required")
-
-        # In the future, having the protocol version be a parameter to this
-        # method would probably be good.  It would be nice if this method
-        # weren't limited to issuing HTTP/1.1 requests.
-        requestLines = []
-        requestLines.append(
-            '%s %s HTTP/1.1\r\n' % (self.method, self.uri))
-        if not self.persistent:
-            requestLines.append('Connection: close\r\n')
-        if TEorCL is not None:
-            requestLines.append(TEorCL)
-        for name, values in self.headers.getAllRawHeaders():
-            requestLines.extend(['%s: %s\r\n' % (name, v) for v in values])
-        requestLines.append('\r\n')
-        transport.writeSequence(requestLines)
-
-
-    def _writeToChunked(self, transport):
-        """
-        Write this request to the given transport using chunked
-        transfer-encoding to frame the body.
-        """
-        self._writeHeaders(transport, 'Transfer-Encoding: chunked\r\n')
-        encoder = ChunkedEncoder(transport)
-        encoder.registerProducer(self.bodyProducer, True)
-        d = self.bodyProducer.startProducing(encoder)
-
-        def cbProduced(ignored):
-            encoder.unregisterProducer()
-        def ebProduced(err):
-            encoder._allowNoMoreWrites()
-            # Don't call the encoder's unregisterProducer because it will write
-            # a zero-length chunk.  This would indicate to the server that the
-            # request body is complete.  There was an error, though, so we
-            # don't want to do that.
-            transport.unregisterProducer()
-            return err
-        d.addCallbacks(cbProduced, ebProduced)
-        return d
-
-
-    def _writeToContentLength(self, transport):
-        """
-        Write this request to the given transport using content-length to frame
-        the body.
-        """
-        self._writeHeaders(
-            transport,
-            'Content-Length: %d\r\n' % (self.bodyProducer.length,))
-
-        # This Deferred is used to signal an error in the data written to the
-        # encoder below.  It can only errback and it will only do so before too
-        # many bytes have been written to the encoder and before the producer
-        # Deferred fires.
-        finishedConsuming = Deferred()
-
-        # This makes sure the producer writes the correct number of bytes for
-        # the request body.
-        encoder = LengthEnforcingConsumer(
-            self.bodyProducer, transport, finishedConsuming)
-
-        transport.registerProducer(self.bodyProducer, True)
-
-        finishedProducing = self.bodyProducer.startProducing(encoder)
-
-        def combine(consuming, producing):
-            # This Deferred is returned and will be fired when the first of
-            # consuming or producing fires. If it's cancelled, forward that
-            # cancellation to the producer.
-            def cancelConsuming(ign):
-                finishedProducing.cancel()
-            ultimate = Deferred(cancelConsuming)
-
-            # Keep track of what has happened so far.  This initially
-            # contains None, then an integer uniquely identifying what
-            # sequence of events happened.  See the callbacks and errbacks
-            # defined below for the meaning of each value.
-            state = [None]
-
-            def ebConsuming(err):
-                if state == [None]:
-                    # The consuming Deferred failed first.  This means the
-                    # overall writeTo Deferred is going to errback now.  The
-                    # producing Deferred should not fire later (because the
-                    # consumer should have called stopProducing on the
-                    # producer), but if it does, a callback will be ignored
-                    # and an errback will be logged.
-                    state[0] = 1
-                    ultimate.errback(err)
-                else:
-                    # The consuming Deferred errbacked after the producing
-                    # Deferred fired.  This really shouldn't ever happen.
-                    # If it does, I goofed.  Log the error anyway, just so
-                    # there's a chance someone might notice and complain.
-                    log.err(
-                        err,
-                        "Buggy state machine in %r/[%d]: "
-                        "ebConsuming called" % (self, state[0]))
-
-            def cbProducing(result):
-                if state == [None]:
-                    # The producing Deferred succeeded first.  Nothing will
-                    # ever happen to the consuming Deferred.  Tell the
-                    # encoder we're done so it can check what the producer
-                    # wrote and make sure it was right.
-                    state[0] = 2
-                    try:
-                        encoder._noMoreWritesExpected()
-                    except:
-                        # Fail the overall writeTo Deferred - something the
-                        # producer did was wrong.
-                        ultimate.errback()
-                    else:
-                        # Success - succeed the overall writeTo Deferred.
-                        ultimate.callback(None)
-                # Otherwise, the consuming Deferred already errbacked.  The
-                # producing Deferred wasn't supposed to fire, but it did
-                # anyway.  It's buggy, but there's not really anything to be
-                # done about it.  Just ignore this result.
-
-            def ebProducing(err):
-                if state == [None]:
-                    # The producing Deferred failed first.  This means the
-                    # overall writeTo Deferred is going to errback now.
-                    # Tell the encoder that we're done so it knows to reject
-                    # further writes from the producer (which should not
-                    # happen, but the producer may be buggy).
-                    state[0] = 3
-                    encoder._allowNoMoreWrites()
-                    ultimate.errback(err)
-                else:
-                    # The producing Deferred failed after the consuming
-                    # Deferred failed.  It shouldn't have, so it's buggy.
-                    # Log the exception in case anyone who can fix the code
-                    # is watching.
-                    log.err(err, "Producer is buggy")
-
-            consuming.addErrback(ebConsuming)
-            producing.addCallbacks(cbProducing, ebProducing)
-
-            return ultimate
-
-        d = combine(finishedConsuming, finishedProducing)
-        def f(passthrough):
-            # Regardless of what happens with the overall Deferred, once it
-            # fires, the producer registered way up above the definition of
-            # combine should be unregistered.
-            transport.unregisterProducer()
-            return passthrough
-        d.addBoth(f)
-        return d
-
-
-    def writeTo(self, transport):
-        """
-        Format this L{Request} as an HTTP/1.1 request and write it to the given
-        transport.  If bodyProducer is not None, it will be associated with an
-        L{IConsumer}.
-
-        @return: A L{Deferred} which fires with C{None} when the request has
-            been completely written to the transport or with a L{Failure} if
-            there is any problem generating the request bytes.
-        """
-        if self.bodyProducer is not None:
-            if self.bodyProducer.length is UNKNOWN_LENGTH:
-                return self._writeToChunked(transport)
-            else:
-                return self._writeToContentLength(transport)
-        else:
-            self._writeHeaders(transport, None)
-            return succeed(None)
-
-
-    def stopWriting(self):
-        """
-        Stop writing this request to the transport.  This can only be called
-        after C{writeTo} and before the L{Deferred} returned by C{writeTo}
-        fires.  It should cancel any asynchronous task started by C{writeTo}.
-        The L{Deferred} returned by C{writeTo} need not be fired if this method
-        is called.
-        """
-        # If bodyProducer is None, then the Deferred returned by writeTo has
-        # fired already and this method cannot be called.
-        _callAppFunction(self.bodyProducer.stopProducing)
-
-
-
-class LengthEnforcingConsumer:
-    """
-    An L{IConsumer} proxy which enforces an exact length requirement on the
-    total data written to it.
-
-    @ivar _length: The number of bytes remaining to be written.
-
-    @ivar _producer: The L{IBodyProducer} which is writing to this
-        consumer.
-
-    @ivar _consumer: The consumer to which at most C{_length} bytes will be
-        forwarded.
-
-    @ivar _finished: A L{Deferred} which will be fired with a L{Failure} if too
-        many bytes are written to this consumer.
-    """
-    def __init__(self, producer, consumer, finished):
-        self._length = producer.length
-        self._producer = producer
-        self._consumer = consumer
-        self._finished = finished
-
-
-    def _allowNoMoreWrites(self):
-        """
-        Indicate that no additional writes are allowed.  Attempts to write
-        after calling this method will be met with an exception.
-        """
-        self._finished = None
-
-
-    def write(self, bytes):
-        """
-        Write C{bytes} to the underlying consumer unless
-        C{_noMoreWritesExpected} has been called or there are/have been too
-        many bytes.
-        """
-        if self._finished is None:
-            # No writes are supposed to happen any more.  Try to convince the
-            # calling code to stop calling this method by calling its
-            # stopProducing method and then throwing an exception at it.  This
-            # exception isn't documented as part of the API because you're
-            # never supposed to expect it: only buggy code will ever receive
-            # it.
-            self._producer.stopProducing()
-            raise ExcessWrite()
-
-        if len(bytes) <= self._length:
-            self._length -= len(bytes)
-            self._consumer.write(bytes)
-        else:
-            # No synchronous exception is raised in *this* error path because
-            # we still have _finished which we can use to report the error to a
-            # better place than the direct caller of this method (some
-            # arbitrary application code).
-            _callAppFunction(self._producer.stopProducing)
-            self._finished.errback(WrongBodyLength("too many bytes written"))
-            self._allowNoMoreWrites()
-
-
-    def _noMoreWritesExpected(self):
-        """
-        Called to indicate no more bytes will be written to this consumer.
-        Check to see that the correct number have been written.
-
-        @raise WrongBodyLength: If not enough bytes have been written.
-        """
-        if self._finished is not None:
-            self._allowNoMoreWrites()
-            if self._length:
-                raise WrongBodyLength("too few bytes written")
-
-
-
-def makeStatefulDispatcher(name, template):
-    """
-    Given a I{dispatch} name and a function, return a function which can be
-    used as a method and which, when called, will call another method defined
-    on the instance and return the result.  The other method which is called is
-    determined by the value of the C{_state} attribute of the instance.
-
-    @param name: A string which is used to construct the name of the subsidiary
-        method to invoke.  The subsidiary method is named like C{'_%s_%s' %
-        (name, _state)}.
-
-    @param template: A function object which is used to give the returned
-        function a docstring.
-
-    @return: The dispatcher function.
-    """
-    def dispatcher(self, *args, **kwargs):
-        func = getattr(self, '_' + name + '_' + self._state, None)
-        if func is None:
-            raise RuntimeError(
-                "%r has no %s method in state %s" % (self, name, self._state))
-        return func(*args, **kwargs)
-    dispatcher.__doc__ = template.__doc__
-    return dispatcher
-
-
-
-class Response:
-    """
-    A L{Response} instance describes an HTTP response received from an HTTP
-    server.
-
-    L{Response} should not be subclassed or instantiated.
-
-    @ivar _transport: The transport which is delivering this response.
-
-    @ivar _bodyProtocol: The L{IProtocol} provider to which the body is
-        delivered.  C{None} before one has been registered with
-        C{deliverBody}.
-
-    @ivar _bodyBuffer: A C{list} of the strings passed to C{bodyDataReceived}
-        before C{deliverBody} is called.  C{None} afterwards.
-
-    @ivar _state: Indicates what state this L{Response} instance is in,
-        particularly with respect to delivering bytes from the response body
-        to an application-suppled protocol object.  This may be one of
-        C{'INITIAL'}, C{'CONNECTED'}, C{'DEFERRED_CLOSE'}, or C{'FINISHED'},
-        with the following meanings:
-
-          - INITIAL: This is the state L{Response} objects start in.  No
-            protocol has yet been provided and the underlying transport may
-            still have bytes to deliver to it.
-
-          - DEFERRED_CLOSE: If the underlying transport indicates all bytes
-            have been delivered but no application-provided protocol is yet
-            available, the L{Response} moves to this state.  Data is
-            buffered and waiting for a protocol to be delivered to.
-
-          - CONNECTED: If a protocol is provided when the state is INITIAL,
-            the L{Response} moves to this state.  Any buffered data is
-            delivered and any data which arrives from the transport
-            subsequently is given directly to the protocol.
-
-          - FINISHED: If a protocol is provided in the DEFERRED_CLOSE state,
-            the L{Response} moves to this state after delivering all
-            buffered data to the protocol.  Otherwise, if the L{Response} is
-            in the CONNECTED state, if the transport indicates there is no
-            more data, the L{Response} moves to this state.  Nothing else
-            can happen once the L{Response} is in this state.
-    """
-    implements(IResponse)
-
-    length = UNKNOWN_LENGTH
-
-    _bodyProtocol = None
-    _bodyFinished = False
-
-    def __init__(self, version, code, phrase, headers, _transport):
-        self.version = version
-        self.code = code
-        self.phrase = phrase
-        self.headers = headers
-        self._transport = _transport
-        self._bodyBuffer = []
-        self._state = 'INITIAL'
-
-
-    def deliverBody(self, protocol):
-        """
-        Dispatch the given L{IProtocol} depending of the current state of the
-        response.
-        """
-    deliverBody = makeStatefulDispatcher('deliverBody', deliverBody)
-
-
-    def _deliverBody_INITIAL(self, protocol):
-        """
-        Deliver any buffered data to C{protocol} and prepare to deliver any
-        future data to it.  Move to the C{'CONNECTED'} state.
-        """
-        # Now that there's a protocol to consume the body, resume the
-        # transport.  It was previously paused by HTTPClientParser to avoid
-        # reading too much data before it could be handled.
-        self._transport.resumeProducing()
-
-        protocol.makeConnection(self._transport)
-        self._bodyProtocol = protocol
-        for data in self._bodyBuffer:
-            self._bodyProtocol.dataReceived(data)
-        self._bodyBuffer = None
-        self._state = 'CONNECTED'
-
-
-    def _deliverBody_CONNECTED(self, protocol):
-        """
-        It is invalid to attempt to deliver data to a protocol when it is
-        already being delivered to another protocol.
-        """
-        raise RuntimeError(
-            "Response already has protocol %r, cannot deliverBody "
-            "again" % (self._bodyProtocol,))
-
-
-    def _deliverBody_DEFERRED_CLOSE(self, protocol):
-        """
-        Deliver any buffered data to C{protocol} and then disconnect the
-        protocol.  Move to the C{'FINISHED'} state.
-        """
-        # Unlike _deliverBody_INITIAL, there is no need to resume the
-        # transport here because all of the response data has been received
-        # already.  Some higher level code may want to resume the transport if
-        # that code expects further data to be received over it.
-
-        protocol.makeConnection(self._transport)
-
-        for data in self._bodyBuffer:
-            protocol.dataReceived(data)
-        self._bodyBuffer = None
-        protocol.connectionLost(self._reason)
-        self._state = 'FINISHED'
-
-
-    def _deliverBody_FINISHED(self, protocol):
-        """
-        It is invalid to attempt to deliver data to a protocol after the
-        response body has been delivered to another protocol.
-        """
-        raise RuntimeError(
-            "Response already finished, cannot deliverBody now.")
-
-
-    def _bodyDataReceived(self, data):
-        """
-        Called by HTTPClientParser with chunks of data from the response body.
-        They will be buffered or delivered to the protocol passed to
-        deliverBody.
-        """
-    _bodyDataReceived = makeStatefulDispatcher('bodyDataReceived',
-                                               _bodyDataReceived)
-
-
-    def _bodyDataReceived_INITIAL(self, data):
-        """
-        Buffer any data received for later delivery to a protocol passed to
-        C{deliverBody}.
-
-        Little or no data should be buffered by this method, since the
-        transport has been paused and will not be resumed until a protocol
-        is supplied.
-        """
-        self._bodyBuffer.append(data)
-
-
-    def _bodyDataReceived_CONNECTED(self, data):
-        """
-        Deliver any data received to the protocol to which this L{Response}
-        is connected.
-        """
-        self._bodyProtocol.dataReceived(data)
-
-
-    def _bodyDataReceived_DEFERRED_CLOSE(self, data):
-        """
-        It is invalid for data to be delivered after it has been indicated
-        that the response body has been completely delivered.
-        """
-        raise RuntimeError("Cannot receive body data after _bodyDataFinished")
-
-
-    def _bodyDataReceived_FINISHED(self, data):
-        """
-        It is invalid for data to be delivered after the response body has
-        been delivered to a protocol.
-        """
-        raise RuntimeError("Cannot receive body data after protocol disconnected")
-
-
-    def _bodyDataFinished(self, reason=None):
-        """
-        Called by HTTPClientParser when no more body data is available.  If the
-        optional reason is supplied, this indicates a problem or potential
-        problem receiving all of the response body.
-        """
-    _bodyDataFinished = makeStatefulDispatcher('bodyDataFinished',
-                                               _bodyDataFinished)
-
-
-    def _bodyDataFinished_INITIAL(self, reason=None):
-        """
-        Move to the C{'DEFERRED_CLOSE'} state to wait for a protocol to
-        which to deliver the response body.
-        """
-        self._state = 'DEFERRED_CLOSE'
-        if reason is None:
-            reason = Failure(ResponseDone("Response body fully received"))
-        self._reason = reason
-
-
-    def _bodyDataFinished_CONNECTED(self, reason=None):
-        """
-        Disconnect the protocol and move to the C{'FINISHED'} state.
-        """
-        if reason is None:
-            reason = Failure(ResponseDone("Response body fully received"))
-        self._bodyProtocol.connectionLost(reason)
-        self._bodyProtocol = None
-        self._state = 'FINISHED'
-
-
-    def _bodyDataFinished_DEFERRED_CLOSE(self):
-        """
-        It is invalid to attempt to notify the L{Response} of the end of the
-        response body data more than once.
-        """
-        raise RuntimeError("Cannot finish body data more than once")
-
-
-    def _bodyDataFinished_FINISHED(self):
-        """
-        It is invalid to attempt to notify the L{Response} of the end of the
-        response body data more than once.
-        """
-        raise RuntimeError("Cannot finish body data after protocol disconnected")
-
-
-
-class ChunkedEncoder:
-    """
-    Helper object which exposes L{IConsumer} on top of L{HTTP11ClientProtocol}
-    for streaming request bodies to the server.
-    """
-    implements(IConsumer)
-
-    def __init__(self, transport):
-        self.transport = transport
-
-
-    def _allowNoMoreWrites(self):
-        """
-        Indicate that no additional writes are allowed.  Attempts to write
-        after calling this method will be met with an exception.
-        """
-        self.transport = None
-
-
-    def registerProducer(self, producer, streaming):
-        """
-        Register the given producer with C{self.transport}.
-        """
-        self.transport.registerProducer(producer, streaming)
-
-
-    def write(self, data):
-        """
-        Write the given request body bytes to the transport using chunked
-        encoding.
-
-        @type data: C{str}
-        """
-        if self.transport is None:
-            raise ExcessWrite()
-        self.transport.writeSequence(("%x\r\n" % len(data), data, "\r\n"))
-
-
-    def unregisterProducer(self):
-        """
-        Indicate that the request body is complete and finish the request.
-        """
-        self.write('')
-        self.transport.unregisterProducer()
-        self._allowNoMoreWrites()
-
-
-
-class TransportProxyProducer:
-    """
-    An L{IPushProducer} implementation which wraps another such thing and
-    proxies calls to it until it is told to stop.
-
-    @ivar _producer: The wrapped L{IPushProducer} provider or C{None} after
-        this proxy has been stopped.
-    """
-    implements(IPushProducer)
-
-    # LineReceiver uses this undocumented attribute of transports to decide
-    # when to stop calling lineReceived or rawDataReceived (if it finds it to
-    # be true, it doesn't bother to deliver any more data).  Set disconnecting
-    # to False here and never change it to true so that all data is always
-    # delivered to us and so that LineReceiver doesn't fail with an
-    # AttributeError.
-    disconnecting = False
-
-    def __init__(self, producer):
-        self._producer = producer
-
-
-    def _stopProxying(self):
-        """
-        Stop forwarding calls of L{IPushProducer} methods to the underlying
-        L{IPushProvider} provider.
-        """
-        self._producer = None
-
-
-    def stopProducing(self):
-        """
-        Proxy the stoppage to the underlying producer, unless this proxy has
-        been stopped.
-        """
-        if self._producer is not None:
-            self._producer.stopProducing()
-
-
-    def resumeProducing(self):
-        """
-        Proxy the resumption to the underlying producer, unless this proxy has
-        been stopped.
-        """
-        if self._producer is not None:
-            self._producer.resumeProducing()
-
-
-    def pauseProducing(self):
-        """
-        Proxy the pause to the underlying producer, unless this proxy has been
-        stopped.
-        """
-        if self._producer is not None:
-            self._producer.pauseProducing()
-
-
-
-class HTTP11ClientProtocol(Protocol):
-    """
-    L{HTTP11ClientProtocol} is an implementation of the HTTP 1.1 client
-    protocol.  It supports as few features as possible.
-
-    @ivar _parser: After a request is issued, the L{HTTPClientParser} to
-        which received data making up the response to that request is
-        delivered.
-
-    @ivar _finishedRequest: After a request is issued, the L{Deferred} which
-        will fire when a L{Response} object corresponding to that request is
-        available.  This allows L{HTTP11ClientProtocol} to fail the request
-        if there is a connection or parsing problem.
-
-    @ivar _currentRequest: After a request is issued, the L{Request}
-        instance used to make that request.  This allows
-        L{HTTP11ClientProtocol} to stop request generation if necessary (for
-        example, if the connection is lost).
-
-    @ivar _transportProxy: After a request is issued, the
-        L{TransportProxyProducer} to which C{_parser} is connected.  This
-        allows C{_parser} to pause and resume the transport in a way which
-        L{HTTP11ClientProtocol} can exert some control over.
-
-    @ivar _responseDeferred: After a request is issued, the L{Deferred} from
-        C{_parser} which will fire with a L{Response} when one has been
-        received.  This is eventually chained with C{_finishedRequest}, but
-        only in certain cases to avoid double firing that Deferred.
-
-    @ivar _state: Indicates what state this L{HTTP11ClientProtocol} instance
-        is in with respect to transmission of a request and reception of a
-        response.  This may be one of the following strings:
-
-          - QUIESCENT: This is the state L{HTTP11ClientProtocol} instances
-            start in.  Nothing is happening: no request is being sent and no
-            response is being received or expected.
-
-          - TRANSMITTING: When a request is made (via L{request}), the
-            instance moves to this state.  L{Request.writeTo} has been used
-            to start to send a request but it has not yet finished.
-
-          - TRANSMITTING_AFTER_RECEIVING_RESPONSE: The server has returned a
-            complete response but the request has not yet been fully sent
-            yet.  The instance will remain in this state until the request
-            is fully sent.
-
-          - GENERATION_FAILED: There was an error while the request.  The
-            request was not fully sent to the network.
-
-          - WAITING: The request was fully sent to the network.  The
-            instance is now waiting for the response to be fully received.
-
-          - ABORTING: Application code has requested that the HTTP connection
-            be aborted.
-
-          - CONNECTION_LOST: The connection has been lost.
-
-    @ivar _abortDeferreds: A list of C{Deferred} instances that will fire when
-        the connection is lost.
-    """
-    _state = 'QUIESCENT'
-    _parser = None
-    _finishedRequest = None
-    _currentRequest = None
-    _transportProxy = None
-    _responseDeferred = None
-
-
-    def __init__(self, quiescentCallback=lambda c: None):
-        self._quiescentCallback = quiescentCallback
-        self._abortDeferreds = []
-
-
-    @property
-    def state(self):
-        return self._state
-
-
-    def request(self, request):
-        """
-        Issue C{request} over C{self.transport} and return a L{Deferred} which
-        will fire with a L{Response} instance or an error.
-
-        @param request: The object defining the parameters of the request to
-           issue.
-        @type request: L{Request}
-
-        @rtype: L{Deferred}
-        @return: The deferred may errback with L{RequestGenerationFailed} if
-            the request was not fully written to the transport due to a local
-            error.  It may errback with L{RequestTransmissionFailed} if it was
-            not fully written to the transport due to a network error.  It may
-            errback with L{ResponseFailed} if the request was sent (not
-            necessarily received) but some or all of the response was lost.  It
-            may errback with L{RequestNotSent} if it is not possible to send
-            any more requests using this L{HTTP11ClientProtocol}.
-        """
-        if self._state != 'QUIESCENT':
-            return fail(RequestNotSent())
-
-        self._state = 'TRANSMITTING'
-        _requestDeferred = maybeDeferred(request.writeTo, self.transport)
-
-        def cancelRequest(ign):
-            # Explicitly cancel the request's deferred if it's still trying to
-            # write when this request is cancelled.
-            if self._state in (
-                    'TRANSMITTING', 'TRANSMITTING_AFTER_RECEIVING_RESPONSE'):
-                _requestDeferred.cancel()
-            else:
-                self.transport.abortConnection()
-                self._disconnectParser(Failure(CancelledError()))
-        self._finishedRequest = Deferred(cancelRequest)
-
-        # Keep track of the Request object in case we need to call stopWriting
-        # on it.
-        self._currentRequest = request
-
-        self._transportProxy = TransportProxyProducer(self.transport)
-        self._parser = HTTPClientParser(request, self._finishResponse)
-        self._parser.makeConnection(self._transportProxy)
-        self._responseDeferred = self._parser._responseDeferred
-
-        def cbRequestWrotten(ignored):
-            if self._state == 'TRANSMITTING':
-                self._state = 'WAITING'
-                self._responseDeferred.chainDeferred(self._finishedRequest)
-
-        def ebRequestWriting(err):
-            if self._state == 'TRANSMITTING':
-                self._state = 'GENERATION_FAILED'
-                self.transport.abortConnection()
-                self._finishedRequest.errback(
-                    Failure(RequestGenerationFailed([err])))
-            else:
-                log.err(err, 'Error writing request, but not in valid state '
-                             'to finalize request: %s' % self._state)
-
-        _requestDeferred.addCallbacks(cbRequestWrotten, ebRequestWriting)
-
-        return self._finishedRequest
-
-
-    def _finishResponse(self, rest):
-        """
-        Called by an L{HTTPClientParser} to indicate that it has parsed a
-        complete response.
-
-        @param rest: A C{str} giving any trailing bytes which were given to
-            the L{HTTPClientParser} which were not part of the response it
-            was parsing.
-        """
-    _finishResponse = makeStatefulDispatcher('finishResponse', _finishResponse)
-
-
-    def _finishResponse_WAITING(self, rest):
-        # Currently the rest parameter is ignored. Don't forget to use it if
-        # we ever add support for pipelining. And maybe check what trailers
-        # mean.
-        if self._state == 'WAITING':
-            self._state = 'QUIESCENT'
-        else:
-            # The server sent the entire response before we could send the
-            # whole request.  That sucks.  Oh well.  Fire the request()
-            # Deferred with the response.  But first, make sure that if the
-            # request does ever finish being written that it won't try to fire
-            # that Deferred.
-            self._state = 'TRANSMITTING_AFTER_RECEIVING_RESPONSE'
-            self._responseDeferred.chainDeferred(self._finishedRequest)
-
-        # This will happen if we're being called due to connection being lost;
-        # if so, no need to disconnect parser again, or to call
-        # _quiescentCallback.
-        if self._parser is None:
-            return
-
-        reason = ConnectionDone("synthetic!")
-        connHeaders = self._parser.connHeaders.getRawHeaders('connection', ())
-        if (('close' in connHeaders) or self._state != "QUIESCENT" or
-            not self._currentRequest.persistent):
-            self._giveUp(Failure(reason))
-        else:
-            # We call the quiescent callback first, to ensure connection gets
-            # added back to connection pool before we finish the request.
-            try:
-                self._quiescentCallback(self)
-            except:
-                # If callback throws exception, just log it and disconnect;
-                # keeping persistent connections around is an optimisation:
-                log.err()
-                self.transport.loseConnection()
-            self._disconnectParser(reason)
-
-
-    _finishResponse_TRANSMITTING = _finishResponse_WAITING
-
-
-    def _disconnectParser(self, reason):
-        """
-        If there is still a parser, call its C{connectionLost} method with the
-        given reason.  If there is not, do nothing.
-
-        @type reason: L{Failure}
-        """
-        if self._parser is not None:
-            parser = self._parser
-            self._parser = None
-            self._currentRequest = None
-            self._finishedRequest = None
-            self._responseDeferred = None
-
-            # The parser is no longer allowed to do anything to the real
-            # transport.  Stop proxying from the parser's transport to the real
-            # transport before telling the parser it's done so that it can't do
-            # anything.
-            self._transportProxy._stopProxying()
-            self._transportProxy = None
-            parser.connectionLost(reason)
-
-
-    def _giveUp(self, reason):
-        """
-        Lose the underlying connection and disconnect the parser with the given
-        L{Failure}.
-
-        Use this method instead of calling the transport's loseConnection
-        method directly otherwise random things will break.
-        """
-        self.transport.loseConnection()
-        self._disconnectParser(reason)
-
-
-    def dataReceived(self, bytes):
-        """
-        Handle some stuff from some place.
-        """
-        try:
-            self._parser.dataReceived(bytes)
-        except:
-            self._giveUp(Failure())
-
-
-    def connectionLost(self, reason):
-        """
-        The underlying transport went away.  If appropriate, notify the parser
-        object.
-        """
-    connectionLost = makeStatefulDispatcher('connectionLost', connectionLost)
-
-
-    def _connectionLost_QUIESCENT(self, reason):
-        """
-        Nothing is currently happening.  Move to the C{'CONNECTION_LOST'}
-        state but otherwise do nothing.
-        """
-        self._state = 'CONNECTION_LOST'
-
-
-    def _connectionLost_GENERATION_FAILED(self, reason):
-        """
-        The connection was in an inconsistent state.  Move to the
-        C{'CONNECTION_LOST'} state but otherwise do nothing.
-        """
-        self._state = 'CONNECTION_LOST'
-
-
-    def _connectionLost_TRANSMITTING(self, reason):
-        """
-        Fail the L{Deferred} for the current request, notify the request
-        object that it does not need to continue transmitting itself, and
-        move to the C{'CONNECTION_LOST'} state.
-        """
-        self._state = 'CONNECTION_LOST'
-        self._finishedRequest.errback(
-            Failure(RequestTransmissionFailed([reason])))
-        del self._finishedRequest
-
-        # Tell the request that it should stop bothering now.
-        self._currentRequest.stopWriting()
-
-
-    def _connectionLost_TRANSMITTING_AFTER_RECEIVING_RESPONSE(self, reason):
-        """
-        Move to the C{'CONNECTION_LOST'} state.
-        """
-        self._state = 'CONNECTION_LOST'
-
-
-    def _connectionLost_WAITING(self, reason):
-        """
-        Disconnect the response parser so that it can propagate the event as
-        necessary (for example, to call an application protocol's
-        C{connectionLost} method, or to fail a request L{Deferred}) and move
-        to the C{'CONNECTION_LOST'} state.
-        """
-        self._disconnectParser(reason)
-        self._state = 'CONNECTION_LOST'
-
-
-    def _connectionLost_ABORTING(self, reason):
-        """
-        Disconnect the response parser with a L{ConnectionAborted} failure, and
-        move to the C{'CONNECTION_LOST'} state.
-        """
-        self._disconnectParser(Failure(ConnectionAborted()))
-        self._state = 'CONNECTION_LOST'
-        for d in self._abortDeferreds:
-            d.callback(None)
-        self._abortDeferreds = []
-
-
-    def abort(self):
-        """
-        Close the connection and cause all outstanding L{request} L{Deferred}s
-        to fire with an error.
-        """
-        if self._state == "CONNECTION_LOST":
-            return succeed(None)
-        self.transport.loseConnection()
-        self._state = 'ABORTING'
-        d = Deferred()
-        self._abortDeferreds.append(d)
-        return d
diff --git a/scrapy/xlib/tx/client.py b/scrapy/xlib/tx/client.py
deleted file mode 100644
index c3830dc47..000000000
--- a/scrapy/xlib/tx/client.py
+++ /dev/null
@@ -1,1168 +0,0 @@
-# -*- test-case-name: twisted.web.test.test_webclient,twisted.web.test.test_agent -*-
-# Copyright (c) Twisted Matrix Laboratories.
-# See LICENSE for details.
-
-"""
-HTTP client.
-"""
-
-from __future__ import division, absolute_import
-
-import os
-
-try:
-    from urlparse import urlunparse
-    from urllib import splithost, splittype
-except ImportError:
-    from urllib.parse import splithost, splittype
-    from urllib.parse import urlunparse as _urlunparse
-
-    def urlunparse(parts):
-        result = _urlunparse(tuple([p.decode("charmap") for p in parts]))
-        return result.encode("charmap")
-import zlib
-
-from zope.interface import implementer
-
-from twisted.python import log
-from twisted.python.failure import Failure
-from twisted.web import http
-from twisted.internet import defer, protocol, task, reactor
-from twisted.internet.interfaces import IProtocol
-from twisted.python import failure
-from twisted.python.components import proxyForInterface
-from twisted.web import error
-from twisted.web.http_headers import Headers
-
-from .endpoints import TCP4ClientEndpoint, SSL4ClientEndpoint
-from .iweb import IResponse, UNKNOWN_LENGTH, IBodyProducer
-
-
-class PartialDownloadError(error.Error):
-    """
-    Page was only partially downloaded, we got disconnected in middle.
-
-    @ivar response: All of the response body which was downloaded.
-    """
-
-
-class _URL(tuple):
-    """
-    A parsed URL.
-
-    At some point this should be replaced with a better URL implementation.
-    """
-    def __new__(self, scheme, host, port, path):
-        return tuple.__new__(_URL, (scheme, host, port, path))
-
-
-    def __init__(self, scheme, host, port, path):
-        self.scheme = scheme
-        self.host = host
-        self.port = port
-        self.path = path
-
-
-def _parse(url, defaultPort=None):
-    """
-    Split the given URL into the scheme, host, port, and path.
-
-    @type url: C{bytes}
-    @param url: An URL to parse.
-
-    @type defaultPort: C{int} or C{None}
-    @param defaultPort: An alternate value to use as the port if the URL does
-    not include one.
-
-    @return: A four-tuple of the scheme, host, port, and path of the URL.  All
-    of these are C{bytes} instances except for port, which is an C{int}.
-    """
-    url = url.strip()
-    parsed = http.urlparse(url)
-    scheme = parsed[0]
-    path = urlunparse((b'', b'') + parsed[2:])
-
-    if defaultPort is None:
-        if scheme == b'https':
-            defaultPort = 443
-        else:
-            defaultPort = 80
-
-    host, port = parsed[1], defaultPort
-    if b':' in host:
-        host, port = host.split(b':')
-        try:
-            port = int(port)
-        except ValueError:
-            port = defaultPort
-
-    if path == b'':
-        path = b'/'
-
-    return _URL(scheme, host, port, path)
-
-
-def _makeGetterFactory(url, factoryFactory, contextFactory=None,
-                       *args, **kwargs):
-    """
-    Create and connect an HTTP page getting factory.
-
-    Any additional positional or keyword arguments are used when calling
-    C{factoryFactory}.
-
-    @param factoryFactory: Factory factory that is called with C{url}, C{args}
-        and C{kwargs} to produce the getter
-
-    @param contextFactory: Context factory to use when creating a secure
-        connection, defaulting to C{None}
-
-    @return: The factory created by C{factoryFactory}
-    """
-    scheme, host, port, path = _parse(url)
-    factory = factoryFactory(url, *args, **kwargs)
-    if scheme == b'https':
-        from twisted.internet import ssl
-        if contextFactory is None:
-            contextFactory = ssl.ClientContextFactory()
-        reactor.connectSSL(host, port, factory, contextFactory)
-    else:
-        reactor.connectTCP(host, port, factory)
-    return factory
-
-
-# The code which follows is based on the new HTTP client implementation.  It
-# should be significantly better than anything above, though it is not yet
-# feature equivalent.
-
-from twisted.web.error import SchemeNotSupported
-from ._newclient import Request, Response, HTTP11ClientProtocol
-from ._newclient import ResponseDone, ResponseFailed
-from ._newclient import RequestNotSent, RequestTransmissionFailed
-from ._newclient import (
-    ResponseNeverReceived, PotentialDataLoss, _WrapperException)
-
-try:
-    from twisted.internet.ssl import ClientContextFactory
-except ImportError:
-    class WebClientContextFactory(object):
-        """
-        A web context factory which doesn't work because the necessary SSL
-        support is missing.
-        """
-        def getContext(self, hostname, port):
-            raise NotImplementedError("SSL support unavailable")
-else:
-    class WebClientContextFactory(ClientContextFactory):
-        """
-        A web context factory which ignores the hostname and port and does no
-        certificate verification.
-        """
-        def getContext(self, hostname, port):
-            return ClientContextFactory.getContext(self)
-
-
-
-class _WebToNormalContextFactory(object):
-    """
-    Adapt a web context factory to a normal context factory.
-
-    @ivar _webContext: A web context factory which accepts a hostname and port
-        number to its C{getContext} method.
-
-    @ivar _hostname: The hostname which will be passed to
-        C{_webContext.getContext}.
-
-    @ivar _port: The port number which will be passed to
-        C{_webContext.getContext}.
-    """
-    def __init__(self, webContext, hostname, port):
-        self._webContext = webContext
-        self._hostname = hostname
-        self._port = port
-
-
-    def getContext(self):
-        """
-        Called the wrapped web context factory's C{getContext} method with a
-        hostname and port number and return the resulting context object.
-        """
-        return self._webContext.getContext(self._hostname, self._port)
-
-
-
-@implementer(IBodyProducer)
-class FileBodyProducer(object):
-    """
-    L{FileBodyProducer} produces bytes from an input file object incrementally
-    and writes them to a consumer.
-
-    Since file-like objects cannot be read from in an event-driven manner,
-    L{FileBodyProducer} uses a L{Cooperator} instance to schedule reads from
-    the file.  This process is also paused and resumed based on notifications
-    from the L{IConsumer} provider being written to.
-
-    The file is closed after it has been read, or if the producer is stopped
-    early.
-
-    @ivar _inputFile: Any file-like object, bytes read from which will be
-        written to a consumer.
-
-    @ivar _cooperate: A method like L{Cooperator.cooperate} which is used to
-        schedule all reads.
-
-    @ivar _readSize: The number of bytes to read from C{_inputFile} at a time.
-    """
-
-    # Python 2.4 doesn't have these symbolic constants
-    _SEEK_SET = getattr(os, 'SEEK_SET', 0)
-    _SEEK_END = getattr(os, 'SEEK_END', 2)
-
-    def __init__(self, inputFile, cooperator=task, readSize=2 ** 16):
-        self._inputFile = inputFile
-        self._cooperate = cooperator.cooperate
-        self._readSize = readSize
-        self.length = self._determineLength(inputFile)
-
-
-    def _determineLength(self, fObj):
-        """
-        Determine how many bytes can be read out of C{fObj} (assuming it is not
-        modified from this point on).  If the determination cannot be made,
-        return C{UNKNOWN_LENGTH}.
-        """
-        try:
-            seek = fObj.seek
-            tell = fObj.tell
-        except AttributeError:
-            return UNKNOWN_LENGTH
-        originalPosition = tell()
-        seek(0, self._SEEK_END)
-        end = tell()
-        seek(originalPosition, self._SEEK_SET)
-        return end - originalPosition
-
-
-    def stopProducing(self):
-        """
-        Permanently stop writing bytes from the file to the consumer by
-        stopping the underlying L{CooperativeTask}.
-        """
-        self._inputFile.close()
-        self._task.stop()
-
-
-    def startProducing(self, consumer):
-        """
-        Start a cooperative task which will read bytes from the input file and
-        write them to C{consumer}.  Return a L{Deferred} which fires after all
-        bytes have been written.
-
-        @param consumer: Any L{IConsumer} provider
-        """
-        self._task = self._cooperate(self._writeloop(consumer))
-        d = self._task.whenDone()
-        def maybeStopped(reason):
-            # IBodyProducer.startProducing's Deferred isn't support to fire if
-            # stopProducing is called.
-            reason.trap(task.TaskStopped)
-            return defer.Deferred()
-        d.addCallbacks(lambda ignored: None, maybeStopped)
-        return d
-
-
-    def _writeloop(self, consumer):
-        """
-        Return an iterator which reads one chunk of bytes from the input file
-        and writes them to the consumer for each time it is iterated.
-        """
-        while True:
-            bytes = self._inputFile.read(self._readSize)
-            if not bytes:
-                self._inputFile.close()
-                break
-            consumer.write(bytes)
-            yield None
-
-
-    def pauseProducing(self):
-        """
-        Temporarily suspend copying bytes from the input file to the consumer
-        by pausing the L{CooperativeTask} which drives that activity.
-        """
-        self._task.pause()
-
-
-    def resumeProducing(self):
-        """
-        Undo the effects of a previous C{pauseProducing} and resume copying
-        bytes to the consumer by resuming the L{CooperativeTask} which drives
-        the write activity.
-        """
-        self._task.resume()
-
-
-
-class _HTTP11ClientFactory(protocol.Factory):
-    """
-    A factory for L{HTTP11ClientProtocol}, used by L{HTTPConnectionPool}.
-
-    @ivar _quiescentCallback: The quiescent callback to be passed to protocol
-        instances, used to return them to the connection pool.
-
-    @since: 11.1
-    """
-    def __init__(self, quiescentCallback):
-        self._quiescentCallback = quiescentCallback
-
-
-    def buildProtocol(self, addr):
-        return HTTP11ClientProtocol(self._quiescentCallback)
-
-
-
-class _RetryingHTTP11ClientProtocol(object):
-    """
-    A wrapper for L{HTTP11ClientProtocol} that automatically retries requests.
-
-    @ivar _clientProtocol: The underlying L{HTTP11ClientProtocol}.
-
-    @ivar _newConnection: A callable that creates a new connection for a
-        retry.
-    """
-
-    def __init__(self, clientProtocol, newConnection):
-        self._clientProtocol = clientProtocol
-        self._newConnection = newConnection
-
-
-    def _shouldRetry(self, method, exception, bodyProducer):
-        """
-        Indicate whether request should be retried.
-
-        Only returns C{True} if method is idempotent, no response was
-        received, the reason for the failed request was not due to
-        user-requested cancellation, and no body was sent. The latter
-        requirement may be relaxed in the future, and PUT added to approved
-        method list.
-        """
-        if method not in ("GET", "HEAD", "OPTIONS", "DELETE", "TRACE"):
-            return False
-        if not isinstance(exception, (RequestNotSent, RequestTransmissionFailed,
-                                      ResponseNeverReceived)):
-            return False
-        if isinstance(exception, _WrapperException):
-            for failure in exception.reasons:
-                if failure.check(defer.CancelledError):
-                    return False
-        if bodyProducer is not None:
-            return False
-        return True
-
-
-    def request(self, request):
-        """
-        Do a request, and retry once (with a new connection) it it fails in
-        a retryable manner.
-
-        @param request: A L{Request} instance that will be requested using the
-            wrapped protocol.
-        """
-        d = self._clientProtocol.request(request)
-
-        def failed(reason):
-            if self._shouldRetry(request.method, reason.value,
-                                 request.bodyProducer):
-                return self._newConnection().addCallback(
-                    lambda connection: connection.request(request))
-            else:
-                return reason
-        d.addErrback(failed)
-        return d
-
-
-
-class HTTPConnectionPool(object):
-    """
-    A pool of persistent HTTP connections.
-
-    Features:
-     - Cached connections will eventually time out.
-     - Limits on maximum number of persistent connections.
-
-    Connections are stored using keys, which should be chosen such that any
-    connections stored under a given key can be used interchangeably.
-
-    Failed requests done using previously cached connections will be retried
-    once if they use an idempotent method (e.g. GET), in case the HTTP server
-    timed them out.
-
-    @ivar persistent: Boolean indicating whether connections should be
-        persistent. Connections are persistent by default.
-
-    @ivar maxPersistentPerHost: The maximum number of cached persistent
-        connections for a C{host:port} destination.
-    @type maxPersistentPerHost: C{int}
-
-    @ivar cachedConnectionTimeout: Number of seconds a cached persistent
-        connection will stay open before disconnecting.
-
-    @ivar retryAutomatically: C{boolean} indicating whether idempotent
-        requests should be retried once if no response was received.
-
-    @ivar _factory: The factory used to connect to the proxy.
-
-    @ivar _connections: Map (scheme, host, port) to lists of
-        L{HTTP11ClientProtocol} instances.
-
-    @ivar _timeouts: Map L{HTTP11ClientProtocol} instances to a
-        C{IDelayedCall} instance of their timeout.
-
-    @since: 12.1
-    """
-
-    _factory = _HTTP11ClientFactory
-    maxPersistentPerHost = 2
-    cachedConnectionTimeout = 240
-    retryAutomatically = True
-
-    def __init__(self, reactor, persistent=True):
-        self._reactor = reactor
-        self.persistent = persistent
-        self._connections = {}
-        self._timeouts = {}
-
-
-    def getConnection(self, key, endpoint):
-        """
-        Supply a connection, newly created or retrieved from the pool, to be
-        used for one HTTP request.
-
-        The connection will remain out of the pool (not available to be
-        returned from future calls to this method) until one HTTP request has
-        been completed over it.
-
-        Afterwards, if the connection is still open, it will automatically be
-        added to the pool.
-
-        @param key: A unique key identifying connections that can be used
-            interchangeably.
-
-        @param endpoint: An endpoint that can be used to open a new connection
-            if no cached connection is available.
-
-        @return: A C{Deferred} that will fire with a L{HTTP11ClientProtocol}
-           (or a wrapper) that can be used to send a single HTTP request.
-        """
-        # Try to get cached version:
-        connections = self._connections.get(key)
-        while connections:
-            connection = connections.pop(0)
-            # Cancel timeout:
-            self._timeouts[connection].cancel()
-            del self._timeouts[connection]
-            if connection.state == "QUIESCENT":
-                if self.retryAutomatically:
-                    newConnection = lambda: self._newConnection(key, endpoint)
-                    connection = _RetryingHTTP11ClientProtocol(
-                        connection, newConnection)
-                return defer.succeed(connection)
-
-        return self._newConnection(key, endpoint)
-
-
-    def _newConnection(self, key, endpoint):
-        """
-        Create a new connection.
-
-        This implements the new connection code path for L{getConnection}.
-        """
-        def quiescentCallback(protocol):
-            self._putConnection(key, protocol)
-        factory = self._factory(quiescentCallback)
-        return endpoint.connect(factory)
-
-
-    def _removeConnection(self, key, connection):
-        """
-        Remove a connection from the cache and disconnect it.
-        """
-        connection.transport.loseConnection()
-        self._connections[key].remove(connection)
-        del self._timeouts[connection]
-
-
-    def _putConnection(self, key, connection):
-        """
-        Return a persistent connection to the pool. This will be called by
-        L{HTTP11ClientProtocol} when the connection becomes quiescent.
-        """
-        if connection.state != "QUIESCENT":
-            # Log with traceback for debugging purposes:
-            try:
-                raise RuntimeError(
-                    "BUG: Non-quiescent protocol added to connection pool.")
-            except:
-                log.err()
-            return
-        connections = self._connections.setdefault(key, [])
-        if len(connections) == self.maxPersistentPerHost:
-            dropped = connections.pop(0)
-            dropped.transport.loseConnection()
-            self._timeouts[dropped].cancel()
-            del self._timeouts[dropped]
-        connections.append(connection)
-        cid = self._reactor.callLater(self.cachedConnectionTimeout,
-                                      self._removeConnection,
-                                      key, connection)
-        self._timeouts[connection] = cid
-
-
-    def closeCachedConnections(self):
-        """
-        Close all persistent connections and remove them from the pool.
-
-        @return: L{defer.Deferred} that fires when all connections have been
-            closed.
-        """
-        results = []
-        for protocols in self._connections.itervalues():
-            for p in protocols:
-                results.append(p.abort())
-        self._connections = {}
-        for dc in self._timeouts.values():
-            dc.cancel()
-        self._timeouts = {}
-        return defer.gatherResults(results).addCallback(lambda ign: None)
-
-
-
-class _AgentBase(object):
-    """
-    Base class offering common facilities for L{Agent}-type classes.
-
-    @ivar _reactor: The C{IReactorTime} implementation which will be used by
-        the pool, and perhaps by subclasses as well.
-
-    @ivar _pool: The L{HTTPConnectionPool} used to manage HTTP connections.
-    """
-
-    def __init__(self, reactor, pool):
-        if pool is None:
-            pool = HTTPConnectionPool(reactor, False)
-        self._reactor = reactor
-        self._pool = pool
-
-
-    def _computeHostValue(self, scheme, host, port):
-        """
-        Compute the string to use for the value of the I{Host} header, based on
-        the given scheme, host name, and port number.
-        """
-        if (scheme, port) in (('http', 80), ('https', 443)):
-            return host
-        return '%s:%d' % (host, port)
-
-
-    def _requestWithEndpoint(self, key, endpoint, method, parsedURI,
-                             headers, bodyProducer, requestPath):
-        """
-        Issue a new request, given the endpoint and the path sent as part of
-        the request.
-        """
-        # Create minimal headers, if necessary:
-        if headers is None:
-            headers = Headers()
-        if not headers.hasHeader('host'):
-            #headers = headers.copy()  # not supported in twisted <= 11.1, and it doesn't affects us
-            headers.addRawHeader(
-                'host', self._computeHostValue(parsedURI.scheme, parsedURI.host,
-                                               parsedURI.port))
-
-        d = self._pool.getConnection(key, endpoint)
-        def cbConnected(proto):
-            return proto.request(
-                Request(method, requestPath, headers, bodyProducer,
-                        persistent=self._pool.persistent))
-        d.addCallback(cbConnected)
-        return d
-
-
-
-class Agent(_AgentBase):
-    """
-    L{Agent} is a very basic HTTP client.  It supports I{HTTP} and I{HTTPS}
-    scheme URIs (but performs no certificate checking by default).
-
-    @param pool: A L{HTTPConnectionPool} instance, or C{None}, in which case a
-        non-persistent L{HTTPConnectionPool} instance will be created.
-
-    @ivar _contextFactory: A web context factory which will be used to create
-        SSL context objects for any SSL connections the agent needs to make.
-
-    @ivar _connectTimeout: If not C{None}, the timeout passed to C{connectTCP}
-        or C{connectSSL} for specifying the connection timeout.
-
-    @ivar _bindAddress: If not C{None}, the address passed to C{connectTCP} or
-        C{connectSSL} for specifying the local address to bind to.
-
-    @since: 9.0
-    """
-
-    def __init__(self, reactor, contextFactory=WebClientContextFactory(),
-                 connectTimeout=None, bindAddress=None,
-                 pool=None):
-        _AgentBase.__init__(self, reactor, pool)
-        self._contextFactory = contextFactory
-        self._connectTimeout = connectTimeout
-        self._bindAddress = bindAddress
-
-
-    def _wrapContextFactory(self, host, port):
-        """
-        Create and return a normal context factory wrapped around
-        C{self._contextFactory} in such a way that C{self._contextFactory} will
-        have the host and port information passed to it.
-
-        @param host: A C{str} giving the hostname which will be connected to in
-            order to issue a request.
-
-        @param port: An C{int} giving the port number the connection will be
-            on.
-
-        @return: A context factory suitable to be passed to
-            C{reactor.connectSSL}.
-        """
-        return _WebToNormalContextFactory(self._contextFactory, host, port)
-
-
-    def _getEndpoint(self, scheme, host, port):
-        """
-        Get an endpoint for the given host and port, using a transport
-        selected based on scheme.
-
-        @param scheme: A string like C{'http'} or C{'https'} (the only two
-            supported values) to use to determine how to establish the
-            connection.
-
-        @param host: A C{str} giving the hostname which will be connected to in
-            order to issue a request.
-
-        @param port: An C{int} giving the port number the connection will be
-            on.
-
-        @return: An endpoint which can be used to connect to given address.
-        """
-        kwargs = {}
-        if self._connectTimeout is not None:
-            kwargs['timeout'] = self._connectTimeout
-        kwargs['bindAddress'] = self._bindAddress
-        if scheme == 'http':
-            return TCP4ClientEndpoint(self._reactor, host, port, **kwargs)
-        elif scheme == 'https':
-            return SSL4ClientEndpoint(self._reactor, host, port,
-                                      self._wrapContextFactory(host, port),
-                                      **kwargs)
-        else:
-            raise SchemeNotSupported("Unsupported scheme: %r" % (scheme,))
-
-
-    def request(self, method, uri, headers=None, bodyProducer=None):
-        """
-        Issue a new request.
-
-        @param method: The request method to send.
-        @type method: C{str}
-
-        @param uri: The request URI send.
-        @type uri: C{str}
-
-        @param headers: The request headers to send.  If no I{Host} header is
-            included, one will be added based on the request URI.
-        @type headers: L{Headers}
-
-        @param bodyProducer: An object which will produce the request body or,
-            if the request body is to be empty, L{None}.
-        @type bodyProducer: L{IBodyProducer} provider
-
-        @return: A L{Deferred} which fires with the result of the request (a
-            L{twisted.web.iweb.IResponse} provider), or fails if there is a
-            problem setting up a connection over which to issue the request.
-            It may also fail with L{SchemeNotSupported} if the scheme of the
-            given URI is not supported.
-        @rtype: L{Deferred}
-        """
-        parsedURI = _parse(uri)
-        try:
-            endpoint = self._getEndpoint(parsedURI.scheme, parsedURI.host,
-                                         parsedURI.port)
-        except SchemeNotSupported:
-            return defer.fail(Failure())
-        key = (parsedURI.scheme, parsedURI.host, parsedURI.port)
-        return self._requestWithEndpoint(key, endpoint, method, parsedURI,
-                                         headers, bodyProducer, parsedURI.path)
-
-
-
-class ProxyAgent(_AgentBase):
-    """
-    An HTTP agent able to cross HTTP proxies.
-
-    @ivar _proxyEndpoint: The endpoint used to connect to the proxy.
-
-    @since: 11.1
-    """
-
-    def __init__(self, endpoint, reactor=None, pool=None):
-        if reactor is None:
-            from twisted.internet import reactor
-        _AgentBase.__init__(self, reactor, pool)
-        self._proxyEndpoint = endpoint
-
-
-    def request(self, method, uri, headers=None, bodyProducer=None):
-        """
-        Issue a new request via the configured proxy.
-        """
-        # Cache *all* connections under the same key, since we are only
-        # connecting to a single destination, the proxy:
-        key = ("http-proxy", self._proxyEndpoint)
-
-        # To support proxying HTTPS via CONNECT, we will use key
-        # ("http-proxy-CONNECT", scheme, host, port), and an endpoint that
-        # wraps _proxyEndpoint with an additional callback to do the CONNECT.
-        return self._requestWithEndpoint(key, self._proxyEndpoint, method,
-                                         _parse(uri), headers, bodyProducer,
-                                         uri)
-
-
-
-class _FakeUrllib2Request(object):
-    """
-    A fake C{urllib2.Request} object for C{cookielib} to work with.
-
-    @see: U{http://docs.python.org/library/urllib2.html#request-objects}
-
-    @type uri: C{str}
-    @ivar uri: Request URI.
-
-    @type headers: L{twisted.web.http_headers.Headers}
-    @ivar headers: Request headers.
-
-    @type type: C{str}
-    @ivar type: The scheme of the URI.
-
-    @type host: C{str}
-    @ivar host: The host[:port] of the URI.
-
-    @since: 11.1
-    """
-    def __init__(self, uri):
-        self.uri = uri
-        self.headers = Headers()
-        self.type, rest = splittype(self.uri)
-        self.host, rest = splithost(rest)
-
-
-    def has_header(self, header):
-        return self.headers.hasHeader(header)
-
-
-    def add_unredirected_header(self, name, value):
-        self.headers.addRawHeader(name, value)
-
-
-    def get_full_url(self):
-        return self.uri
-
-
-    def get_header(self, name, default=None):
-        headers = self.headers.getRawHeaders(name, default)
-        if headers is not None:
-            return headers[0]
-        return None
-
-
-    def get_host(self):
-        return self.host
-
-
-    def get_type(self):
-        return self.type
-
-
-    def is_unverifiable(self):
-        # In theory this shouldn't be hardcoded.
-        return False
-
-
-
-class _FakeUrllib2Response(object):
-    """
-    A fake C{urllib2.Response} object for C{cookielib} to work with.
-
-    @type response: C{twisted.web.iweb.IResponse}
-    @ivar response: Underlying Twisted Web response.
-
-    @since: 11.1
-    """
-    def __init__(self, response):
-        self.response = response
-
-
-    def info(self):
-        class _Meta(object):
-            def getheaders(zelf, name):
-                return self.response.headers.getRawHeaders(name, [])
-        return _Meta()
-
-
-
-class CookieAgent(object):
-    """
-    L{CookieAgent} extends the basic L{Agent} to add RFC-compliant
-    handling of HTTP cookies.  Cookies are written to and extracted
-    from a C{cookielib.CookieJar} instance.
-
-    The same cookie jar instance will be used for any requests through this
-    agent, mutating it whenever a I{Set-Cookie} header appears in a response.
-
-    @type _agent: L{twisted.web.client.Agent}
-    @ivar _agent: Underlying Twisted Web agent to issue requests through.
-
-    @type cookieJar: C{cookielib.CookieJar}
-    @ivar cookieJar: Initialized cookie jar to read cookies from and store
-        cookies to.
-
-    @since: 11.1
-    """
-    def __init__(self, agent, cookieJar):
-        self._agent = agent
-        self.cookieJar = cookieJar
-
-
-    def request(self, method, uri, headers=None, bodyProducer=None):
-        """
-        Issue a new request to the wrapped L{Agent}.
-
-        Send a I{Cookie} header if a cookie for C{uri} is stored in
-        L{CookieAgent.cookieJar}. Cookies are automatically extracted and
-        stored from requests.
-
-        If a C{'cookie'} header appears in C{headers} it will override the
-        automatic cookie header obtained from the cookie jar.
-
-        @see: L{Agent.request}
-        """
-        if headers is None:
-            headers = Headers()
-        lastRequest = _FakeUrllib2Request(uri)
-        # Setting a cookie header explicitly will disable automatic request
-        # cookies.
-        if not headers.hasHeader('cookie'):
-            self.cookieJar.add_cookie_header(lastRequest)
-            cookieHeader = lastRequest.get_header('Cookie', None)
-            if cookieHeader is not None:
-                headers = headers.copy()
-                headers.addRawHeader('cookie', cookieHeader)
-
-        d = self._agent.request(method, uri, headers, bodyProducer)
-        d.addCallback(self._extractCookies, lastRequest)
-        return d
-
-
-    def _extractCookies(self, response, request):
-        """
-        Extract response cookies and store them in the cookie jar.
-
-        @type response: L{twisted.web.iweb.IResponse}
-        @param response: Twisted Web response.
-
-        @param request: A urllib2 compatible request object.
-        """
-        resp = _FakeUrllib2Response(response)
-        self.cookieJar.extract_cookies(resp, request)
-        return response
-
-
-
-class GzipDecoder(proxyForInterface(IResponse)):
-    """
-    A wrapper for a L{Response} instance which handles gzip'ed body.
-
-    @ivar original: The original L{Response} object.
-
-    @since: 11.1
-    """
-
-    def __init__(self, response):
-        self.original = response
-        self.length = UNKNOWN_LENGTH
-
-
-    def deliverBody(self, protocol):
-        """
-        Override C{deliverBody} to wrap the given C{protocol} with
-        L{_GzipProtocol}.
-        """
-        self.original.deliverBody(_GzipProtocol(protocol, self.original))
-
-
-
-class _GzipProtocol(proxyForInterface(IProtocol)):
-    """
-    A L{Protocol} implementation which wraps another one, transparently
-    decompressing received data.
-
-    @ivar _zlibDecompress: A zlib decompress object used to decompress the data
-        stream.
-
-    @ivar _response: A reference to the original response, in case of errors.
-
-    @since: 11.1
-    """
-
-    def __init__(self, protocol, response):
-        self.original = protocol
-        self._response = response
-        self._zlibDecompress = zlib.decompressobj(16 + zlib.MAX_WBITS)
-
-
-    def dataReceived(self, data):
-        """
-        Decompress C{data} with the zlib decompressor, forwarding the raw data
-        to the original protocol.
-        """
-        try:
-            rawData = self._zlibDecompress.decompress(data)
-        except zlib.error:
-            raise ResponseFailed([failure.Failure()], self._response)
-        if rawData:
-            self.original.dataReceived(rawData)
-
-
-    def connectionLost(self, reason):
-        """
-        Forward the connection lost event, flushing remaining data from the
-        decompressor if any.
-        """
-        try:
-            rawData = self._zlibDecompress.flush()
-        except zlib.error:
-            raise ResponseFailed([reason, failure.Failure()], self._response)
-        if rawData:
-            self.original.dataReceived(rawData)
-        self.original.connectionLost(reason)
-
-
-
-class ContentDecoderAgent(object):
-    """
-    An L{Agent} wrapper to handle encoded content.
-
-    It takes care of declaring the support for content in the
-    I{Accept-Encoding} header, and automatically decompresses the received data
-    if it's effectively using compression.
-
-    @param decoders: A list or tuple of (name, decoder) objects. The name
-        declares which decoding the decoder supports, and the decoder must
-        return a response object when called/instantiated. For example,
-        C{(('gzip', GzipDecoder))}. The order determines how the decoders are
-        going to be advertized to the server.
-
-    @since: 11.1
-    """
-
-    def __init__(self, agent, decoders):
-        self._agent = agent
-        self._decoders = dict(decoders)
-        self._supported = ','.join([decoder[0] for decoder in decoders])
-
-
-    def request(self, method, uri, headers=None, bodyProducer=None):
-        """
-        Send a client request which declares supporting compressed content.
-
-        @see: L{Agent.request}.
-        """
-        if headers is None:
-            headers = Headers()
-        else:
-            headers = headers.copy()
-        headers.addRawHeader('accept-encoding', self._supported)
-        deferred = self._agent.request(method, uri, headers, bodyProducer)
-        return deferred.addCallback(self._handleResponse)
-
-
-    def _handleResponse(self, response):
-        """
-        Check if the response is encoded, and wrap it to handle decompression.
-        """
-        contentEncodingHeaders = response.headers.getRawHeaders(
-            'content-encoding', [])
-        contentEncodingHeaders = ','.join(contentEncodingHeaders).split(',')
-        while contentEncodingHeaders:
-            name = contentEncodingHeaders.pop().strip()
-            decoder = self._decoders.get(name)
-            if decoder is not None:
-                response = decoder(response)
-            else:
-                # Add it back
-                contentEncodingHeaders.append(name)
-                break
-        if contentEncodingHeaders:
-            response.headers.setRawHeaders(
-                'content-encoding', [','.join(contentEncodingHeaders)])
-        else:
-            response.headers.removeHeader('content-encoding')
-        return response
-
-
-
-class RedirectAgent(object):
-    """
-    An L{Agent} wrapper which handles HTTP redirects.
-
-    The implementation is rather strict: 301 and 302 behaves like 307, not
-    redirecting automatically on methods different from C{GET} and C{HEAD}.
-
-    @param redirectLimit: The maximum number of times the agent is allowed to
-        follow redirects before failing with a L{error.InfiniteRedirection}.
-
-    @since: 11.1
-    """
-
-    def __init__(self, agent, redirectLimit=20):
-        self._agent = agent
-        self._redirectLimit = redirectLimit
-
-
-    def request(self, method, uri, headers=None, bodyProducer=None):
-        """
-        Send a client request following HTTP redirects.
-
-        @see: L{Agent.request}.
-        """
-        deferred = self._agent.request(method, uri, headers, bodyProducer)
-        return deferred.addCallback(
-            self._handleResponse, method, uri, headers, 0)
-
-
-    def _handleRedirect(self, response, method, uri, headers, redirectCount):
-        """
-        Handle a redirect response, checking the number of redirects already
-        followed, and extracting the location header fields.
-        """
-        if redirectCount >= self._redirectLimit:
-            err = error.InfiniteRedirection(
-                response.code,
-                'Infinite redirection detected',
-                location=uri)
-            raise ResponseFailed([failure.Failure(err)], response)
-        locationHeaders = response.headers.getRawHeaders('location', [])
-        if not locationHeaders:
-            err = error.RedirectWithNoLocation(
-                response.code, 'No location header field', uri)
-            raise ResponseFailed([failure.Failure(err)], response)
-        location = locationHeaders[0]
-        deferred = self._agent.request(method, location, headers)
-        return deferred.addCallback(
-            self._handleResponse, method, uri, headers, redirectCount + 1)
-
-
-    def _handleResponse(self, response, method, uri, headers, redirectCount):
-        """
-        Handle the response, making another request if it indicates a redirect.
-        """
-        if response.code in (http.MOVED_PERMANENTLY, http.FOUND,
-                             http.TEMPORARY_REDIRECT):
-            if method not in ('GET', 'HEAD'):
-                err = error.PageRedirect(response.code, location=uri)
-                raise ResponseFailed([failure.Failure(err)], response)
-            return self._handleRedirect(response, method, uri, headers,
-                                        redirectCount)
-        elif response.code == http.SEE_OTHER:
-            return self._handleRedirect(response, 'GET', uri, headers,
-                                        redirectCount)
-        return response
-
-
-
-class _ReadBodyProtocol(protocol.Protocol):
-    """
-    Protocol that collects data sent to it.
-
-    This is a helper for L{IResponse.deliverBody}, which collects the body and
-    fires a deferred with it.
-
-    @ivar deferred: See L{__init__}.
-    @ivar status: See L{__init__}.
-    @ivar message: See L{__init__}.
-
-    @ivar dataBuffer: list of byte-strings received
-    @type dataBuffer: L{list} of L{bytes}
-    """
-
-    def __init__(self, status, message, deferred):
-        """
-        @param status: Status of L{IResponse}
-        @ivar status: L{int}
-
-        @param message: Message of L{IResponse}
-        @type message: L{bytes}
-
-        @param deferred: deferred to fire when response is complete
-        @type deferred: L{Deferred} firing with L{bytes}
-        """
-        self.deferred = deferred
-        self.status = status
-        self.message = message
-        self.dataBuffer = []
-
-
-    def dataReceived(self, data):
-        """
-        Accumulate some more bytes from the response.
-        """
-        self.dataBuffer.append(data)
-
-
-    def connectionLost(self, reason):
-        """
-        Deliver the accumulated response bytes to the waiting L{Deferred}, if
-        the response body has been completely received without error.
-        """
-        if reason.check(ResponseDone):
-            self.deferred.callback(b''.join(self.dataBuffer))
-        elif reason.check(PotentialDataLoss):
-            self.deferred.errback(
-                PartialDownloadError(self.status, self.message,
-                                     b''.join(self.dataBuffer)))
-        else:
-            self.deferred.errback(reason)
-
-
-
-def readBody(response):
-    """
-    Get the body of an L{IResponse} and return it as a byte string.
-
-    This is a helper function for clients that don't want to incrementally
-    receive the body of an HTTP response.
-
-    @param response: The HTTP response for which the body will be read.
-    @type response: L{IResponse} provider
-
-    @return: A L{Deferred} which will fire with the body of the response.
-    """
-    d = defer.Deferred()
-    response.deliverBody(_ReadBodyProtocol(response.code, response.phrase, d))
-    return d
-
-
-
-__all__ = [
-    'PartialDownloadError', 'HTTPPageGetter', 'HTTPPageDownloader',
-    'HTTPClientFactory', 'HTTPDownloader', 'getPage', 'downloadPage',
-    'ResponseDone', 'Response', 'ResponseFailed', 'Agent', 'CookieAgent',
-    'ProxyAgent', 'ContentDecoderAgent', 'GzipDecoder', 'RedirectAgent',
-    'HTTPConnectionPool', 'readBody']
diff --git a/scrapy/xlib/tx/endpoints.py b/scrapy/xlib/tx/endpoints.py
deleted file mode 100644
index d8a92ccd0..000000000
--- a/scrapy/xlib/tx/endpoints.py
+++ /dev/null
@@ -1,1269 +0,0 @@
-# -*- test-case-name: twisted.internet.test.test_endpoints -*-
-# Copyright (c) Twisted Matrix Laboratories.
-# See LICENSE for details.
-
-"""
-Implementations of L{IStreamServerEndpoint} and L{IStreamClientEndpoint} that
-wrap the L{IReactorTCP}, L{IReactorSSL}, and L{IReactorUNIX} interfaces.
-
-This also implements an extensible mini-language for describing endpoints,
-parsed by the L{clientFromString} and L{serverFromString} functions.
-
-@since: 10.1
-"""
-
-from __future__ import division, absolute_import
-
-import os
-import socket
-
-from zope.interface import implementer, directlyProvides
-import warnings
-
-from twisted.internet import interfaces, defer, error, fdesc, threads
-from twisted.internet.protocol import (
-        ClientFactory, Protocol, ProcessProtocol, Factory)
-from twisted.internet.interfaces import IStreamServerEndpointStringParser
-from twisted.internet.interfaces import IStreamClientEndpointStringParser
-from twisted.python.filepath import FilePath
-from twisted.python.failure import Failure
-from twisted.python import log
-from twisted.python.components import proxyForInterface
-
-from twisted.plugin import IPlugin, getPlugins
-from twisted.internet import stdio
-
-from .interfaces import IFileDescriptorReceiver
-
-
-__all__ = ["TCP4ClientEndpoint", "SSL4ServerEndpoint"]
-
-
-class _WrappingProtocol(Protocol):
-    """
-    Wrap another protocol in order to notify my user when a connection has
-    been made.
-    """
-
-    def __init__(self, connectedDeferred, wrappedProtocol):
-        """
-        @param connectedDeferred: The L{Deferred} that will callback
-            with the C{wrappedProtocol} when it is connected.
-
-        @param wrappedProtocol: An L{IProtocol} provider that will be
-            connected.
-        """
-        self._connectedDeferred = connectedDeferred
-        self._wrappedProtocol = wrappedProtocol
-
-        for iface in [interfaces.IHalfCloseableProtocol,
-                      IFileDescriptorReceiver]:
-            if iface.providedBy(self._wrappedProtocol):
-                directlyProvides(self, iface)
-
-
-    def logPrefix(self):
-        """
-        Transparently pass through the wrapped protocol's log prefix.
-        """
-        if interfaces.ILoggingContext.providedBy(self._wrappedProtocol):
-            return self._wrappedProtocol.logPrefix()
-        return self._wrappedProtocol.__class__.__name__
-
-
-    def connectionMade(self):
-        """
-        Connect the C{self._wrappedProtocol} to our C{self.transport} and
-        callback C{self._connectedDeferred} with the C{self._wrappedProtocol}
-        """
-        self._wrappedProtocol.makeConnection(self.transport)
-        self._connectedDeferred.callback(self._wrappedProtocol)
-
-
-    def dataReceived(self, data):
-        """
-        Proxy C{dataReceived} calls to our C{self._wrappedProtocol}
-        """
-        return self._wrappedProtocol.dataReceived(data)
-
-
-    def fileDescriptorReceived(self, descriptor):
-        """
-        Proxy C{fileDescriptorReceived} calls to our C{self._wrappedProtocol}
-        """
-        return self._wrappedProtocol.fileDescriptorReceived(descriptor)
-
-
-    def connectionLost(self, reason):
-        """
-        Proxy C{connectionLost} calls to our C{self._wrappedProtocol}
-        """
-        return self._wrappedProtocol.connectionLost(reason)
-
-
-    def readConnectionLost(self):
-        """
-        Proxy L{IHalfCloseableProtocol.readConnectionLost} to our
-        C{self._wrappedProtocol}
-        """
-        self._wrappedProtocol.readConnectionLost()
-
-
-    def writeConnectionLost(self):
-        """
-        Proxy L{IHalfCloseableProtocol.writeConnectionLost} to our
-        C{self._wrappedProtocol}
-        """
-        self._wrappedProtocol.writeConnectionLost()
-
-
-
-class _WrappingFactory(ClientFactory):
-    """
-    Wrap a factory in order to wrap the protocols it builds.
-
-    @ivar _wrappedFactory: A provider of I{IProtocolFactory} whose buildProtocol
-        method will be called and whose resulting protocol will be wrapped.
-
-    @ivar _onConnection: A L{Deferred} that fires when the protocol is
-        connected
-
-    @ivar _connector: A L{connector <twisted.internet.interfaces.IConnector>}
-        that is managing the current or previous connection attempt.
-    """
-    protocol = _WrappingProtocol
-
-    def __init__(self, wrappedFactory):
-        """
-        @param wrappedFactory: A provider of I{IProtocolFactory} whose
-            buildProtocol method will be called and whose resulting protocol
-            will be wrapped.
-        """
-        self._wrappedFactory = wrappedFactory
-        self._onConnection = defer.Deferred(canceller=self._canceller)
-
-
-    def startedConnecting(self, connector):
-        """
-        A connection attempt was started.  Remember the connector which started
-        said attempt, for use later.
-        """
-        self._connector = connector
-
-
-    def _canceller(self, deferred):
-        """
-        The outgoing connection attempt was cancelled.  Fail that L{Deferred}
-        with an L{error.ConnectingCancelledError}.
-
-        @param deferred: The L{Deferred <defer.Deferred>} that was cancelled;
-            should be the same as C{self._onConnection}.
-        @type deferred: L{Deferred <defer.Deferred>}
-
-        @note: This relies on startedConnecting having been called, so it may
-            seem as though there's a race condition where C{_connector} may not
-            have been set.  However, using public APIs, this condition is
-            impossible to catch, because a connection API
-            (C{connectTCP}/C{SSL}/C{UNIX}) is always invoked before a
-            L{_WrappingFactory}'s L{Deferred <defer.Deferred>} is returned to
-            C{connect()}'s caller.
-
-        @return: C{None}
-        """
-        deferred.errback(
-            error.ConnectingCancelledError(
-                self._connector.getDestination()))
-        self._connector.stopConnecting()
-
-
-    def doStart(self):
-        """
-        Start notifications are passed straight through to the wrapped factory.
-        """
-        self._wrappedFactory.doStart()
-
-
-    def doStop(self):
-        """
-        Stop notifications are passed straight through to the wrapped factory.
-        """
-        self._wrappedFactory.doStop()
-
-
-    def buildProtocol(self, addr):
-        """
-        Proxy C{buildProtocol} to our C{self._wrappedFactory} or errback
-        the C{self._onConnection} L{Deferred}.
-
-        @return: An instance of L{_WrappingProtocol} or C{None}
-        """
-        try:
-            proto = self._wrappedFactory.buildProtocol(addr)
-        except:
-            self._onConnection.errback()
-        else:
-            return self.protocol(self._onConnection, proto)
-
-
-    def clientConnectionFailed(self, connector, reason):
-        """
-        Errback the C{self._onConnection} L{Deferred} when the
-        client connection fails.
-        """
-        if not self._onConnection.called:
-            self._onConnection.errback(reason)
-
-
-
-
-
-@implementer(interfaces.ITransport)
-class _ProcessEndpointTransport(proxyForInterface(
-                                interfaces.IProcessTransport, '_process')):
-    """
-    An L{ITransport} provider for the L{IProtocol} instance passed to the
-    process endpoint.
-
-    @ivar _process: An active process transport which will be used by write
-        methods on this object to write data to a child process.
-    @type _process: L{interfaces.IProcessTransport} provider
-    """
-
-    def write(self, data):
-        """
-        Write to the child process's standard input.
-
-        @param data: The data to write on stdin.
-        """
-        self._process.writeToChild(0, data)
-
-
-    def writeSequence(self, data):
-        """
-        Write a list of strings to child process's stdin.
-
-        @param data: The list of chunks to write on stdin.
-        """
-        for chunk in data:
-            self._process.writeToChild(0, chunk)
-
-
-@implementer(interfaces.IStreamServerEndpoint)
-class _TCPServerEndpoint(object):
-    """
-    A TCP server endpoint interface
-    """
-
-    def __init__(self, reactor, port, backlog, interface):
-        """
-        @param reactor: An L{IReactorTCP} provider.
-
-        @param port: The port number used for listening
-        @type port: int
-
-        @param backlog: Size of the listen queue
-        @type backlog: int
-
-        @param interface: The hostname to bind to
-        @type interface: str
-        """
-        self._reactor = reactor
-        self._port = port
-        self._backlog = backlog
-        self._interface = interface
-
-
-    def listen(self, protocolFactory):
-        """
-        Implement L{IStreamServerEndpoint.listen} to listen on a TCP
-        socket
-        """
-        return defer.execute(self._reactor.listenTCP,
-                             self._port,
-                             protocolFactory,
-                             backlog=self._backlog,
-                             interface=self._interface)
-
-
-
-class TCP4ServerEndpoint(_TCPServerEndpoint):
-    """
-    Implements TCP server endpoint with an IPv4 configuration
-    """
-    def __init__(self, reactor, port, backlog=50, interface=''):
-        """
-        @param reactor: An L{IReactorTCP} provider.
-
-        @param port: The port number used for listening
-        @type port: int
-
-        @param backlog: Size of the listen queue
-        @type backlog: int
-
-        @param interface: The hostname to bind to, defaults to '' (all)
-        @type interface: str
-        """
-        _TCPServerEndpoint.__init__(self, reactor, port, backlog, interface)
-
-
-
-class TCP6ServerEndpoint(_TCPServerEndpoint):
-    """
-    Implements TCP server endpoint with an IPv6 configuration
-    """
-    def __init__(self, reactor, port, backlog=50, interface='::'):
-        """
-        @param reactor: An L{IReactorTCP} provider.
-
-        @param port: The port number used for listening
-        @type port: int
-
-        @param backlog: Size of the listen queue
-        @type backlog: int
-
-        @param interface: The hostname to bind to, defaults to '' (all)
-        @type interface: str
-        """
-        _TCPServerEndpoint.__init__(self, reactor, port, backlog, interface)
-
-
-
-@implementer(interfaces.IStreamClientEndpoint)
-class TCP4ClientEndpoint(object):
-    """
-    TCP client endpoint with an IPv4 configuration.
-    """
-
-    def __init__(self, reactor, host, port, timeout=30, bindAddress=None):
-        """
-        @param reactor: An L{IReactorTCP} provider
-
-        @param host: A hostname, used when connecting
-        @type host: str
-
-        @param port: The port number, used when connecting
-        @type port: int
-
-        @param timeout: The number of seconds to wait before assuming the
-            connection has failed.
-        @type timeout: int
-
-        @param bindAddress: A (host, port) tuple of local address to bind to,
-            or None.
-        @type bindAddress: tuple
-        """
-        self._reactor = reactor
-        self._host = host
-        self._port = port
-        self._timeout = timeout
-        self._bindAddress = bindAddress
-
-
-    def connect(self, protocolFactory):
-        """
-        Implement L{IStreamClientEndpoint.connect} to connect via TCP.
-        """
-        try:
-            wf = _WrappingFactory(protocolFactory)
-            self._reactor.connectTCP(
-                self._host, self._port, wf,
-                timeout=self._timeout, bindAddress=self._bindAddress)
-            return wf._onConnection
-        except:
-            return defer.fail()
-
-
-
-
-@implementer(interfaces.IStreamServerEndpoint)
-class SSL4ServerEndpoint(object):
-    """
-    SSL secured TCP server endpoint with an IPv4 configuration.
-    """
-
-    def __init__(self, reactor, port, sslContextFactory,
-                 backlog=50, interface=''):
-        """
-        @param reactor: An L{IReactorSSL} provider.
-
-        @param port: The port number used for listening
-        @type port: int
-
-        @param sslContextFactory: An instance of
-            L{twisted.internet.ssl.ContextFactory}.
-
-        @param backlog: Size of the listen queue
-        @type backlog: int
-
-        @param interface: The hostname to bind to, defaults to '' (all)
-        @type interface: str
-        """
-        self._reactor = reactor
-        self._port = port
-        self._sslContextFactory = sslContextFactory
-        self._backlog = backlog
-        self._interface = interface
-
-
-    def listen(self, protocolFactory):
-        """
-        Implement L{IStreamServerEndpoint.listen} to listen for SSL on a
-        TCP socket.
-        """
-        return defer.execute(self._reactor.listenSSL, self._port,
-                             protocolFactory,
-                             contextFactory=self._sslContextFactory,
-                             backlog=self._backlog,
-                             interface=self._interface)
-
-
-
-@implementer(interfaces.IStreamClientEndpoint)
-class SSL4ClientEndpoint(object):
-    """
-    SSL secured TCP client endpoint with an IPv4 configuration
-    """
-
-    def __init__(self, reactor, host, port, sslContextFactory,
-                 timeout=30, bindAddress=None):
-        """
-        @param reactor: An L{IReactorSSL} provider.
-
-        @param host: A hostname, used when connecting
-        @type host: str
-
-        @param port: The port number, used when connecting
-        @type port: int
-
-        @param sslContextFactory: SSL Configuration information as an instance
-            of L{twisted.internet.ssl.ContextFactory}.
-
-        @param timeout: Number of seconds to wait before assuming the
-            connection has failed.
-        @type timeout: int
-
-        @param bindAddress: A (host, port) tuple of local address to bind to,
-            or None.
-        @type bindAddress: tuple
-        """
-        self._reactor = reactor
-        self._host = host
-        self._port = port
-        self._sslContextFactory = sslContextFactory
-        self._timeout = timeout
-        self._bindAddress = bindAddress
-
-
-    def connect(self, protocolFactory):
-        """
-        Implement L{IStreamClientEndpoint.connect} to connect with SSL over
-        TCP.
-        """
-        try:
-            wf = _WrappingFactory(protocolFactory)
-            self._reactor.connectSSL(
-                self._host, self._port, wf, self._sslContextFactory,
-                timeout=self._timeout, bindAddress=self._bindAddress)
-            return wf._onConnection
-        except:
-            return defer.fail()
-
-
-
-@implementer(interfaces.IStreamServerEndpoint)
-class UNIXServerEndpoint(object):
-    """
-    UnixSocket server endpoint.
-    """
-    def __init__(self, reactor, address, backlog=50, mode=0o666, wantPID=0):
-        """
-        @param reactor: An L{IReactorUNIX} provider.
-        @param address: The path to the Unix socket file, used when listening
-        @param backlog: number of connections to allow in backlog.
-        @param mode: mode to set on the unix socket.  This parameter is
-            deprecated.  Permissions should be set on the directory which
-            contains the UNIX socket.
-        @param wantPID: If True, create a pidfile for the socket.
-        """
-        self._reactor = reactor
-        self._address = address
-        self._backlog = backlog
-        self._mode = mode
-        self._wantPID = wantPID
-
-
-    def listen(self, protocolFactory):
-        """
-        Implement L{IStreamServerEndpoint.listen} to listen on a UNIX socket.
-        """
-        return defer.execute(self._reactor.listenUNIX, self._address,
-                             protocolFactory,
-                             backlog=self._backlog,
-                             mode=self._mode,
-                             wantPID=self._wantPID)
-
-
-
-@implementer(interfaces.IStreamClientEndpoint)
-class UNIXClientEndpoint(object):
-    """
-    UnixSocket client endpoint.
-    """
-    def __init__(self, reactor, path, timeout=30, checkPID=0):
-        """
-        @param reactor: An L{IReactorUNIX} provider.
-
-        @param path: The path to the Unix socket file, used when connecting
-        @type path: str
-
-        @param timeout: Number of seconds to wait before assuming the
-            connection has failed.
-        @type timeout: int
-
-        @param checkPID: If True, check for a pid file to verify that a server
-            is listening.
-        @type checkPID: bool
-        """
-        self._reactor = reactor
-        self._path = path
-        self._timeout = timeout
-        self._checkPID = checkPID
-
-
-    def connect(self, protocolFactory):
-        """
-        Implement L{IStreamClientEndpoint.connect} to connect via a
-        UNIX Socket
-        """
-        try:
-            wf = _WrappingFactory(protocolFactory)
-            self._reactor.connectUNIX(
-                self._path, wf,
-                timeout=self._timeout,
-                checkPID=self._checkPID)
-            return wf._onConnection
-        except:
-            return defer.fail()
-
-
-
-@implementer(interfaces.IStreamServerEndpoint)
-class AdoptedStreamServerEndpoint(object):
-    """
-    An endpoint for listening on a file descriptor initialized outside of
-    Twisted.
-
-    @ivar _used: A C{bool} indicating whether this endpoint has been used to
-        listen with a factory yet.  C{True} if so.
-    """
-    _close = os.close
-    _setNonBlocking = staticmethod(fdesc.setNonBlocking)
-
-    def __init__(self, reactor, fileno, addressFamily):
-        """
-        @param reactor: An L{IReactorSocket} provider.
-
-        @param fileno: An integer file descriptor corresponding to a listening
-            I{SOCK_STREAM} socket.
-
-        @param addressFamily: The address family of the socket given by
-            C{fileno}.
-        """
-        self.reactor = reactor
-        self.fileno = fileno
-        self.addressFamily = addressFamily
-        self._used = False
-
-
-    def listen(self, factory):
-        """
-        Implement L{IStreamServerEndpoint.listen} to start listening on, and
-        then close, C{self._fileno}.
-        """
-        if self._used:
-            return defer.fail(error.AlreadyListened())
-        self._used = True
-
-        try:
-            self._setNonBlocking(self.fileno)
-            port = self.reactor.adoptStreamPort(
-                self.fileno, self.addressFamily, factory)
-            self._close(self.fileno)
-        except:
-            return defer.fail()
-        return defer.succeed(port)
-
-
-
-def _parseTCP(factory, port, interface="", backlog=50):
-    """
-    Internal parser function for L{_parseServer} to convert the string
-    arguments for a TCP(IPv4) stream endpoint into the structured arguments.
-
-    @param factory: the protocol factory being parsed, or C{None}.  (This was a
-        leftover argument from when this code was in C{strports}, and is now
-        mostly None and unused.)
-
-    @type factory: L{IProtocolFactory} or C{NoneType}
-
-    @param port: the integer port number to bind
-    @type port: C{str}
-
-    @param interface: the interface IP to listen on
-    @param backlog: the length of the listen queue
-    @type backlog: C{str}
-
-    @return: a 2-tuple of (args, kwargs), describing  the parameters to
-        L{IReactorTCP.listenTCP} (or, modulo argument 2, the factory, arguments
-        to L{TCP4ServerEndpoint}.
-    """
-    return (int(port), factory), {'interface': interface,
-                                  'backlog': int(backlog)}
-
-
-
-def _parseUNIX(factory, address, mode='666', backlog=50, lockfile=True):
-    """
-    Internal parser function for L{_parseServer} to convert the string
-    arguments for a UNIX (AF_UNIX/SOCK_STREAM) stream endpoint into the
-    structured arguments.
-
-    @param factory: the protocol factory being parsed, or C{None}.  (This was a
-        leftover argument from when this code was in C{strports}, and is now
-        mostly None and unused.)
-
-    @type factory: L{IProtocolFactory} or C{NoneType}
-
-    @param address: the pathname of the unix socket
-    @type address: C{str}
-
-    @param backlog: the length of the listen queue
-    @type backlog: C{str}
-
-    @param lockfile: A string '0' or '1', mapping to True and False
-        respectively.  See the C{wantPID} argument to C{listenUNIX}
-
-    @return: a 2-tuple of (args, kwargs), describing  the parameters to
-        L{IReactorTCP.listenUNIX} (or, modulo argument 2, the factory,
-        arguments to L{UNIXServerEndpoint}.
-    """
-    return (
-        (address, factory),
-        {'mode': int(mode, 8), 'backlog': int(backlog),
-         'wantPID': bool(int(lockfile))})
-
-
-
-def _parseSSL(factory, port, privateKey="server.pem", certKey=None,
-              sslmethod=None, interface='', backlog=50):
-    """
-    Internal parser function for L{_parseServer} to convert the string
-    arguments for an SSL (over TCP/IPv4) stream endpoint into the structured
-    arguments.
-
-    @param factory: the protocol factory being parsed, or C{None}.  (This was a
-        leftover argument from when this code was in C{strports}, and is now
-        mostly None and unused.)
-    @type factory: L{IProtocolFactory} or C{NoneType}
-
-    @param port: the integer port number to bind
-    @type port: C{str}
-
-    @param interface: the interface IP to listen on
-    @param backlog: the length of the listen queue
-    @type backlog: C{str}
-
-    @param privateKey: The file name of a PEM format private key file.
-    @type privateKey: C{str}
-
-    @param certKey: The file name of a PEM format certificate file.
-    @type certKey: C{str}
-
-    @param sslmethod: The string name of an SSL method, based on the name of a
-        constant in C{OpenSSL.SSL}.  Must be one of: "SSLv23_METHOD",
-        "SSLv2_METHOD", "SSLv3_METHOD", "TLSv1_METHOD".
-    @type sslmethod: C{str}
-
-    @return: a 2-tuple of (args, kwargs), describing  the parameters to
-        L{IReactorSSL.listenSSL} (or, modulo argument 2, the factory, arguments
-        to L{SSL4ServerEndpoint}.
-    """
-    from twisted.internet import ssl
-    if certKey is None:
-        certKey = privateKey
-    kw = {}
-    if sslmethod is not None:
-        kw['method'] = getattr(ssl.SSL, sslmethod)
-    else:
-        kw['method'] = ssl.SSL.SSLv23_METHOD
-    certPEM = FilePath(certKey).getContent()
-    keyPEM = FilePath(privateKey).getContent()
-    privateCertificate = ssl.PrivateCertificate.loadPEM(certPEM + keyPEM)
-    cf = ssl.CertificateOptions(
-        privateKey=privateCertificate.privateKey.original,
-        certificate=privateCertificate.original,
-        **kw
-    )
-    return ((int(port), factory, cf),
-            {'interface': interface, 'backlog': int(backlog)})
-
-
-
-@implementer(IPlugin, IStreamServerEndpointStringParser)
-class _StandardIOParser(object):
-    """
-    Stream server endpoint string parser for the Standard I/O type.
-
-    @ivar prefix: See L{IStreamClientEndpointStringParser.prefix}.
-    """
-    prefix = "stdio"
-
-    def _parseServer(self, reactor):
-        """
-        Internal parser function for L{_parseServer} to convert the string
-        arguments into structured arguments for the L{StandardIOEndpoint}
-
-        @param reactor: Reactor for the endpoint
-        """
-        return StandardIOEndpoint(reactor)
-
-
-    def parseStreamServer(self, reactor, *args, **kwargs):
-        # Redirects to another function (self._parseServer), tricks zope.interface
-        # into believing the interface is correctly implemented.
-        return self._parseServer(reactor)
-
-
-
-
-@implementer(IPlugin, IStreamServerEndpointStringParser)
-class _TCP6ServerParser(object):
-    """
-    Stream server endpoint string parser for the TCP6ServerEndpoint type.
-
-    @ivar prefix: See L{IStreamClientEndpointStringParser.prefix}.
-    """
-    prefix = "tcp6"     # Used in _parseServer to identify the plugin with the endpoint type
-
-    def _parseServer(self, reactor, port, backlog=50, interface='::'):
-        """
-        Internal parser function for L{_parseServer} to convert the string
-        arguments into structured arguments for the L{TCP6ServerEndpoint}
-
-        @param reactor: An L{IReactorTCP} provider.
-
-        @param port: The port number used for listening
-        @type port: int
-
-        @param backlog: Size of the listen queue
-        @type backlog: int
-
-        @param interface: The hostname to bind to
-        @type interface: str
-        """
-        port = int(port)
-        backlog = int(backlog)
-        return TCP6ServerEndpoint(reactor, port, backlog, interface)
-
-
-    def parseStreamServer(self, reactor, *args, **kwargs):
-        # Redirects to another function (self._parseServer), tricks zope.interface
-        # into believing the interface is correctly implemented.
-        return self._parseServer(reactor, *args, **kwargs)
-
-
-
-_serverParsers = {"tcp": _parseTCP,
-                  "unix": _parseUNIX,
-                  "ssl": _parseSSL,
-                  }
-
-_OP, _STRING = range(2)
-
-def _tokenize(description):
-    """
-    Tokenize a strports string and yield each token.
-
-    @param description: a string as described by L{serverFromString} or
-        L{clientFromString}.
-
-    @return: an iterable of 2-tuples of (L{_OP} or L{_STRING}, string).  Tuples
-        starting with L{_OP} will contain a second element of either ':' (i.e.
-        'next parameter') or '=' (i.e. 'assign parameter value').  For example,
-        the string 'hello:greet\=ing=world' would result in a generator
-        yielding these values::
-
-            _STRING, 'hello'
-            _OP, ':'
-            _STRING, 'greet=ing'
-            _OP, '='
-            _STRING, 'world'
-    """
-    current = ''
-    ops = ':='
-    nextOps = {':': ':=', '=': ':'}
-    description = iter(description)
-    for n in description:
-        if n in ops:
-            yield _STRING, current
-            yield _OP, n
-            current = ''
-            ops = nextOps[n]
-        elif n == '\\':
-            current += next(description)
-        else:
-            current += n
-    yield _STRING, current
-
-
-
-def _parse(description):
-    """
-    Convert a description string into a list of positional and keyword
-    parameters, using logic vaguely like what Python does.
-
-    @param description: a string as described by L{serverFromString} or
-        L{clientFromString}.
-
-    @return: a 2-tuple of C{(args, kwargs)}, where 'args' is a list of all
-        ':'-separated C{str}s not containing an '=' and 'kwargs' is a map of
-        all C{str}s which do contain an '='.  For example, the result of
-        C{_parse('a:b:d=1:c')} would be C{(['a', 'b', 'c'], {'d': '1'})}.
-    """
-    args, kw = [], {}
-    def add(sofar):
-        if len(sofar) == 1:
-            args.append(sofar[0])
-        else:
-            kw[sofar[0]] = sofar[1]
-    sofar = ()
-    for (type, value) in _tokenize(description):
-        if type is _STRING:
-            sofar += (value,)
-        elif value == ':':
-            add(sofar)
-            sofar = ()
-    add(sofar)
-    return args, kw
-
-
-# Mappings from description "names" to endpoint constructors.
-_endpointServerFactories = {
-    'TCP': TCP4ServerEndpoint,
-    'SSL': SSL4ServerEndpoint,
-    'UNIX': UNIXServerEndpoint,
-    }
-
-_endpointClientFactories = {
-    'TCP': TCP4ClientEndpoint,
-    'SSL': SSL4ClientEndpoint,
-    'UNIX': UNIXClientEndpoint,
-    }
-
-
-_NO_DEFAULT = object()
-
-def _parseServer(description, factory, default=None):
-    """
-    Parse a stports description into a 2-tuple of arguments and keyword values.
-
-    @param description: A description in the format explained by
-        L{serverFromString}.
-    @type description: C{str}
-
-    @param factory: A 'factory' argument; this is left-over from
-        twisted.application.strports, it's not really used.
-    @type factory: L{IProtocolFactory} or L{None}
-
-    @param default: Deprecated argument, specifying the default parser mode to
-        use for unqualified description strings (those which do not have a ':'
-        and prefix).
-    @type default: C{str} or C{NoneType}
-
-    @return: a 3-tuple of (plugin or name, arguments, keyword arguments)
-    """
-    args, kw = _parse(description)
-    if not args or (len(args) == 1 and not kw):
-        deprecationMessage = (
-            "Unqualified strport description passed to 'service'."
-            "Use qualified endpoint descriptions; for example, 'tcp:%s'."
-            % (description,))
-        if default is None:
-            default = 'tcp'
-            warnings.warn(
-                deprecationMessage, category=DeprecationWarning, stacklevel=4)
-        elif default is _NO_DEFAULT:
-            raise ValueError(deprecationMessage)
-        # If the default has been otherwise specified, the user has already
-        # been warned.
-        args[0:0] = [default]
-    endpointType = args[0]
-    parser = _serverParsers.get(endpointType)
-    if parser is None:
-        # If the required parser is not found in _server, check if
-        # a plugin exists for the endpointType
-        for plugin in getPlugins(IStreamServerEndpointStringParser):
-            if plugin.prefix == endpointType:
-                return (plugin, args[1:], kw)
-        raise ValueError("Unknown endpoint type: '%s'" % (endpointType,))
-    return (endpointType.upper(),) + parser(factory, *args[1:], **kw)
-
-
-
-def _serverFromStringLegacy(reactor, description, default):
-    """
-    Underlying implementation of L{serverFromString} which avoids exposing the
-    deprecated 'default' argument to anything but L{strports.service}.
-    """
-    nameOrPlugin, args, kw = _parseServer(description, None, default)
-    if type(nameOrPlugin) is not str:
-        plugin = nameOrPlugin
-        return plugin.parseStreamServer(reactor, *args, **kw)
-    else:
-        name = nameOrPlugin
-    # Chop out the factory.
-    args = args[:1] + args[2:]
-    return _endpointServerFactories[name](reactor, *args, **kw)
-
-
-
-def serverFromString(reactor, description):
-    """
-    Construct a stream server endpoint from an endpoint description string.
-
-    The format for server endpoint descriptions is a simple string.  It is a
-    prefix naming the type of endpoint, then a colon, then the arguments for
-    that endpoint.
-
-    For example, you can call it like this to create an endpoint that will
-    listen on TCP port 80::
-
-        serverFromString(reactor, "tcp:80")
-
-    Additional arguments may be specified as keywords, separated with colons.
-    For example, you can specify the interface for a TCP server endpoint to
-    bind to like this::
-
-        serverFromString(reactor, "tcp:80:interface=127.0.0.1")
-
-    SSL server endpoints may be specified with the 'ssl' prefix, and the
-    private key and certificate files may be specified by the C{privateKey} and
-    C{certKey} arguments::
-
-        serverFromString(reactor, "ssl:443:privateKey=key.pem:certKey=crt.pem")
-
-    If a private key file name (C{privateKey}) isn't provided, a "server.pem"
-    file is assumed to exist which contains the private key. If the certificate
-    file name (C{certKey}) isn't provided, the private key file is assumed to
-    contain the certificate as well.
-
-    You may escape colons in arguments with a backslash, which you will need to
-    use if you want to specify a full pathname argument on Windows::
-
-        serverFromString(reactor,
-            "ssl:443:privateKey=C\\:/key.pem:certKey=C\\:/cert.pem")
-
-    finally, the 'unix' prefix may be used to specify a filesystem UNIX socket,
-    optionally with a 'mode' argument to specify the mode of the socket file
-    created by C{listen}::
-
-        serverFromString(reactor, "unix:/var/run/finger")
-        serverFromString(reactor, "unix:/var/run/finger:mode=660")
-
-    This function is also extensible; new endpoint types may be registered as
-    L{IStreamServerEndpointStringParser} plugins.  See that interface for more
-    information.
-
-    @param reactor: The server endpoint will be constructed with this reactor.
-
-    @param description: The strports description to parse.
-
-    @return: A new endpoint which can be used to listen with the parameters
-        given by by C{description}.
-
-    @rtype: L{IStreamServerEndpoint<twisted.internet.interfaces.IStreamServerEndpoint>}
-
-    @raise ValueError: when the 'description' string cannot be parsed.
-
-    @since: 10.2
-    """
-    return _serverFromStringLegacy(reactor, description, _NO_DEFAULT)
-
-
-
-def quoteStringArgument(argument):
-    """
-    Quote an argument to L{serverFromString} and L{clientFromString}.  Since
-    arguments are separated with colons and colons are escaped with
-    backslashes, some care is necessary if, for example, you have a pathname,
-    you may be tempted to interpolate into a string like this::
-
-        serverFromString("ssl:443:privateKey=%s" % (myPathName,))
-
-    This may appear to work, but will have portability issues (Windows
-    pathnames, for example).  Usually you should just construct the appropriate
-    endpoint type rather than interpolating strings, which in this case would
-    be L{SSL4ServerEndpoint}.  There are some use-cases where you may need to
-    generate such a string, though; for example, a tool to manipulate a
-    configuration file which has strports descriptions in it.  To be correct in
-    those cases, do this instead::
-
-        serverFromString("ssl:443:privateKey=%s" %
-                         (quoteStringArgument(myPathName),))
-
-    @param argument: The part of the endpoint description string you want to
-        pass through.
-
-    @type argument: C{str}
-
-    @return: The quoted argument.
-
-    @rtype: C{str}
-    """
-    return argument.replace('\\', '\\\\').replace(':', '\\:')
-
-
-
-def _parseClientTCP(*args, **kwargs):
-    """
-    Perform any argument value coercion necessary for TCP client parameters.
-
-    Valid positional arguments to this function are host and port.
-
-    Valid keyword arguments to this function are all L{IReactorTCP.connectTCP}
-    arguments.
-
-    @return: The coerced values as a C{dict}.
-    """
-
-    if len(args) == 2:
-        kwargs['port'] = int(args[1])
-        kwargs['host'] = args[0]
-    elif len(args) == 1:
-        if 'host' in kwargs:
-            kwargs['port'] = int(args[0])
-        else:
-            kwargs['host'] = args[0]
-
-    try:
-        kwargs['port'] = int(kwargs['port'])
-    except KeyError:
-        pass
-
-    try:
-        kwargs['timeout'] = int(kwargs['timeout'])
-    except KeyError:
-        pass
-    return kwargs
-
-
-
-def _loadCAsFromDir(directoryPath):
-    """
-    Load certificate-authority certificate objects in a given directory.
-
-    @param directoryPath: a L{FilePath} pointing at a directory to load .pem
-        files from.
-
-    @return: a C{list} of L{OpenSSL.crypto.X509} objects.
-    """
-    from twisted.internet import ssl
-
-    caCerts = {}
-    for child in directoryPath.children():
-        if not child.basename().split('.')[-1].lower() == 'pem':
-            continue
-        try:
-            data = child.getContent()
-        except IOError:
-            # Permission denied, corrupt disk, we don't care.
-            continue
-        try:
-            theCert = ssl.Certificate.loadPEM(data)
-        except ssl.SSL.Error:
-            # Duplicate certificate, invalid certificate, etc.  We don't care.
-            pass
-        else:
-            caCerts[theCert.digest()] = theCert.original
-    return caCerts.values()
-
-
-
-def _parseClientSSL(*args, **kwargs):
-    """
-    Perform any argument value coercion necessary for SSL client parameters.
-
-    Valid keyword arguments to this function are all L{IReactorSSL.connectSSL}
-    arguments except for C{contextFactory}.  Instead, C{certKey} (the path name
-    of the certificate file) C{privateKey} (the path name of the private key
-    associated with the certificate) are accepted and used to construct a
-    context factory.
-
-    Valid positional arguments to this function are host and port.
-
-    @param caCertsDir: The one parameter which is not part of
-        L{IReactorSSL.connectSSL}'s signature, this is a path name used to
-        construct a list of certificate authority certificates.  The directory
-        will be scanned for files ending in C{.pem}, all of which will be
-        considered valid certificate authorities for this connection.
-
-    @type caCertsDir: C{str}
-
-    @return: The coerced values as a C{dict}.
-    """
-    from twisted.internet import ssl
-    kwargs = _parseClientTCP(*args, **kwargs)
-    certKey = kwargs.pop('certKey', None)
-    privateKey = kwargs.pop('privateKey', None)
-    caCertsDir = kwargs.pop('caCertsDir', None)
-    if certKey is not None:
-        certx509 = ssl.Certificate.loadPEM(
-            FilePath(certKey).getContent()).original
-    else:
-        certx509 = None
-    if privateKey is not None:
-        privateKey = ssl.PrivateCertificate.loadPEM(
-            FilePath(privateKey).getContent()).privateKey.original
-    else:
-        privateKey = None
-    if caCertsDir is not None:
-        verify = True
-        caCerts = _loadCAsFromDir(FilePath(caCertsDir))
-    else:
-        verify = False
-        caCerts = None
-    kwargs['sslContextFactory'] = ssl.CertificateOptions(
-        method=ssl.SSL.SSLv23_METHOD,
-        certificate=certx509,
-        privateKey=privateKey,
-        verify=verify,
-        caCerts=caCerts
-    )
-    return kwargs
-
-
-
-def _parseClientUNIX(*args, **kwargs):
-    """
-    Perform any argument value coercion necessary for UNIX client parameters.
-
-    Valid keyword arguments to this function are all L{IReactorUNIX.connectUNIX}
-    keyword arguments except for C{checkPID}.  Instead, C{lockfile} is accepted
-    and has the same meaning.  Also C{path} is used instead of C{address}.
-
-    Valid positional arguments to this function are C{path}.
-
-    @return: The coerced values as a C{dict}.
-    """
-    if len(args) == 1:
-        kwargs['path'] = args[0]
-
-    try:
-        kwargs['checkPID'] = bool(int(kwargs.pop('lockfile')))
-    except KeyError:
-        pass
-    try:
-        kwargs['timeout'] = int(kwargs['timeout'])
-    except KeyError:
-        pass
-    return kwargs
-
-_clientParsers = {
-    'TCP': _parseClientTCP,
-    'SSL': _parseClientSSL,
-    'UNIX': _parseClientUNIX,
-    }
-
-
-
-def clientFromString(reactor, description):
-    """
-    Construct a client endpoint from a description string.
-
-    Client description strings are much like server description strings,
-    although they take all of their arguments as keywords, aside from host and
-    port.
-
-    You can create a TCP client endpoint with the 'host' and 'port' arguments,
-    like so::
-
-        clientFromString(reactor, "tcp:host=www.example.com:port=80")
-
-    or, without specifying host and port keywords::
-
-        clientFromString(reactor, "tcp:www.example.com:80")
-
-    Or you can specify only one or the other, as in the following 2 examples::
-
-        clientFromString(reactor, "tcp:host=www.example.com:80")
-        clientFromString(reactor, "tcp:www.example.com:port=80")
-
-    or an SSL client endpoint with those arguments, plus the arguments used by
-    the server SSL, for a client certificate::
-
-        clientFromString(reactor, "ssl:web.example.com:443:"
-                                  "privateKey=foo.pem:certKey=foo.pem")
-
-    to specify your certificate trust roots, you can identify a directory with
-    PEM files in it with the C{caCertsDir} argument::
-
-        clientFromString(reactor, "ssl:host=web.example.com:port=443:"
-                                  "caCertsDir=/etc/ssl/certs")
-
-    You can create a UNIX client endpoint with the 'path' argument and optional
-    'lockfile' and 'timeout' arguments::
-
-        clientFromString(reactor, "unix:path=/var/foo/bar:lockfile=1:timeout=9")
-
-    or, with the path as a positional argument with or without optional
-    arguments as in the following 2 examples::
-
-        clientFromString(reactor, "unix:/var/foo/bar")
-        clientFromString(reactor, "unix:/var/foo/bar:lockfile=1:timeout=9")
-
-    This function is also extensible; new endpoint types may be registered as
-    L{IStreamClientEndpointStringParser} plugins.  See that interface for more
-    information.
-
-    @param reactor: The client endpoint will be constructed with this reactor.
-
-    @param description: The strports description to parse.
-
-    @return: A new endpoint which can be used to connect with the parameters
-        given by by C{description}.
-    @rtype: L{IStreamClientEndpoint<twisted.internet.interfaces.IStreamClientEndpoint>}
-
-    @since: 10.2
-    """
-    args, kwargs = _parse(description)
-    aname = args.pop(0)
-    name = aname.upper()
-    for plugin in getPlugins(IStreamClientEndpointStringParser):
-        if plugin.prefix.upper() == name:
-            return plugin.parseStreamClient(*args, **kwargs)
-    if name not in _clientParsers:
-        raise ValueError("Unknown endpoint type: %r" % (aname,))
-    kwargs = _clientParsers[name](*args, **kwargs)
-    return _endpointClientFactories[name](reactor, **kwargs)
-
-
-
-def connectProtocol(endpoint, protocol):
-    """
-    Connect a protocol instance to an endpoint.
-
-    This allows using a client endpoint without having to create a factory.
-
-    @param endpoint: A client endpoint to connect to.
-
-    @param protocol: A protocol instance.
-
-    @return: The result of calling C{connect} on the endpoint, i.e. a
-    L{Deferred} that will fire with the protocol when connected, or an
-    appropriate error.
-    """
-    class OneShotFactory(Factory):
-        def buildProtocol(self, addr):
-            return protocol
-    return endpoint.connect(OneShotFactory())
-
diff --git a/scrapy/xlib/tx/interfaces.py b/scrapy/xlib/tx/interfaces.py
deleted file mode 100644
index f3e4ed5d8..000000000
--- a/scrapy/xlib/tx/interfaces.py
+++ /dev/null
@@ -1,2442 +0,0 @@
-# Copyright (c) Twisted Matrix Laboratories.
-# See LICENSE for details.
-
-"""
-Interface documentation.
-
-Maintainer: Itamar Shtull-Trauring
-"""
-
-from __future__ import division, absolute_import
-
-from zope.interface import Interface, Attribute
-
-
-class IAddress(Interface):
-    """
-    An address, e.g. a TCP C{(host, port)}.
-
-    Default implementations are in L{twisted.internet.address}.
-    """
-
-### Reactor Interfaces
-
-class IConnector(Interface):
-    """
-    Object used to interface between connections and protocols.
-
-    Each L{IConnector} manages one connection.
-    """
-
-    def stopConnecting():
-        """
-        Stop attempting to connect.
-        """
-
-    def disconnect():
-        """
-        Disconnect regardless of the connection state.
-
-        If we are connected, disconnect, if we are trying to connect,
-        stop trying.
-        """
-
-    def connect():
-        """
-        Try to connect to remote address.
-        """
-
-    def getDestination():
-        """
-        Return destination this will try to connect to.
-
-        @return: An object which provides L{IAddress}.
-        """
-
-
-
-class IResolverSimple(Interface):
-    def getHostByName(name, timeout = (1, 3, 11, 45)):
-        """
-        Resolve the domain name C{name} into an IP address.
-
-        @type name: C{str}
-        @type timeout: C{tuple}
-        @rtype: L{twisted.internet.defer.Deferred}
-        @return: The callback of the Deferred that is returned will be
-        passed a string that represents the IP address of the specified
-        name, or the errback will be called if the lookup times out.  If
-        multiple types of address records are associated with the name,
-        A6 records will be returned in preference to AAAA records, which
-        will be returned in preference to A records.  If there are multiple
-        records of the type to be returned, one will be selected at random.
-
-        @raise twisted.internet.defer.TimeoutError: Raised (asynchronously)
-        if the name cannot be resolved within the specified timeout period.
-        """
-
-
-
-class IResolver(IResolverSimple):
-    def query(query, timeout=None):
-        """
-        Dispatch C{query} to the method which can handle its type.
-
-        @type query: L{twisted.names.dns.Query}
-        @param query: The DNS query being issued, to which a response is to be
-            generated.
-
-        @type timeout: Sequence of C{int}
-        @param timeout: Number of seconds after which to reissue the query.
-            When the last timeout expires, the query is considered failed.
-
-        @rtype: L{Deferred}
-        @return: A L{Deferred} which fires with a three-tuple of lists of
-            L{twisted.names.dns.RRHeader} instances.  The first element of the
-            tuple gives answers.  The second element of the tuple gives
-            authorities.  The third element of the tuple gives additional
-            information.  The L{Deferred} may instead fail with one of the
-            exceptions defined in L{twisted.names.error} or with
-            C{NotImplementedError}.
-        """
-
-
-    def lookupAddress(name, timeout=None):
-        """
-        Perform an A record lookup.
-
-        @type name: C{str}
-        @param name: DNS name to resolve.
-
-        @type timeout: Sequence of C{int}
-        @param timeout: Number of seconds after which to reissue the query.
-            When the last timeout expires, the query is considered failed.
-
-        @rtype: L{Deferred}
-        @return: A L{Deferred} which fires with a three-tuple of lists of
-            L{twisted.names.dns.RRHeader} instances.  The first element of the
-            tuple gives answers.  The second element of the tuple gives
-            authorities.  The third element of the tuple gives additional
-            information.  The L{Deferred} may instead fail with one of the
-            exceptions defined in L{twisted.names.error} or with
-            C{NotImplementedError}.
-        """
-
-
-    def lookupAddress6(name, timeout=None):
-        """
-        Perform an A6 record lookup.
-
-        @type name: C{str}
-        @param name: DNS name to resolve.
-
-        @type timeout: Sequence of C{int}
-        @param timeout: Number of seconds after which to reissue the query.
-            When the last timeout expires, the query is considered failed.
-
-        @rtype: L{Deferred}
-        @return: A L{Deferred} which fires with a three-tuple of lists of
-            L{twisted.names.dns.RRHeader} instances.  The first element of the
-            tuple gives answers.  The second element of the tuple gives
-            authorities.  The third element of the tuple gives additional
-            information.  The L{Deferred} may instead fail with one of the
-            exceptions defined in L{twisted.names.error} or with
-            C{NotImplementedError}.
-        """
-
-
-    def lookupIPV6Address(name, timeout=None):
-        """
-        Perform an AAAA record lookup.
-
-        @type name: C{str}
-        @param name: DNS name to resolve.
-
-        @type timeout: Sequence of C{int}
-        @param timeout: Number of seconds after which to reissue the query.
-            When the last timeout expires, the query is considered failed.
-
-        @rtype: L{Deferred}
-        @return: A L{Deferred} which fires with a three-tuple of lists of
-            L{twisted.names.dns.RRHeader} instances.  The first element of the
-            tuple gives answers.  The second element of the tuple gives
-            authorities.  The third element of the tuple gives additional
-            information.  The L{Deferred} may instead fail with one of the
-            exceptions defined in L{twisted.names.error} or with
-            C{NotImplementedError}.
-        """
-
-
-    def lookupMailExchange(name, timeout=None):
-        """
-        Perform an MX record lookup.
-
-        @type name: C{str}
-        @param name: DNS name to resolve.
-
-        @type timeout: Sequence of C{int}
-        @param timeout: Number of seconds after which to reissue the query.
-            When the last timeout expires, the query is considered failed.
-
-        @rtype: L{Deferred}
-        @return: A L{Deferred} which fires with a three-tuple of lists of
-            L{twisted.names.dns.RRHeader} instances.  The first element of the
-            tuple gives answers.  The second element of the tuple gives
-            authorities.  The third element of the tuple gives additional
-            information.  The L{Deferred} may instead fail with one of the
-            exceptions defined in L{twisted.names.error} or with
-            C{NotImplementedError}.
-        """
-
-
-    def lookupNameservers(name, timeout=None):
-        """
-        Perform an NS record lookup.
-
-        @type name: C{str}
-        @param name: DNS name to resolve.
-
-        @type timeout: Sequence of C{int}
-        @param timeout: Number of seconds after which to reissue the query.
-            When the last timeout expires, the query is considered failed.
-
-        @rtype: L{Deferred}
-        @return: A L{Deferred} which fires with a three-tuple of lists of
-            L{twisted.names.dns.RRHeader} instances.  The first element of the
-            tuple gives answers.  The second element of the tuple gives
-            authorities.  The third element of the tuple gives additional
-            information.  The L{Deferred} may instead fail with one of the
-            exceptions defined in L{twisted.names.error} or with
-            C{NotImplementedError}.
-        """
-
-
-    def lookupCanonicalName(name, timeout=None):
-        """
-        Perform a CNAME record lookup.
-
-        @type name: C{str}
-        @param name: DNS name to resolve.
-
-        @type timeout: Sequence of C{int}
-        @param timeout: Number of seconds after which to reissue the query.
-            When the last timeout expires, the query is considered failed.
-
-        @rtype: L{Deferred}
-        @return: A L{Deferred} which fires with a three-tuple of lists of
-            L{twisted.names.dns.RRHeader} instances.  The first element of the
-            tuple gives answers.  The second element of the tuple gives
-            authorities.  The third element of the tuple gives additional
-            information.  The L{Deferred} may instead fail with one of the
-            exceptions defined in L{twisted.names.error} or with
-            C{NotImplementedError}.
-        """
-
-
-    def lookupMailBox(name, timeout=None):
-        """
-        Perform an MB record lookup.
-
-        @type name: C{str}
-        @param name: DNS name to resolve.
-
-        @type timeout: Sequence of C{int}
-        @param timeout: Number of seconds after which to reissue the query.
-            When the last timeout expires, the query is considered failed.
-
-        @rtype: L{Deferred}
-        @return: A L{Deferred} which fires with a three-tuple of lists of
-            L{twisted.names.dns.RRHeader} instances.  The first element of the
-            tuple gives answers.  The second element of the tuple gives
-            authorities.  The third element of the tuple gives additional
-            information.  The L{Deferred} may instead fail with one of the
-            exceptions defined in L{twisted.names.error} or with
-            C{NotImplementedError}.
-        """
-
-
-    def lookupMailGroup(name, timeout=None):
-        """
-        Perform an MG record lookup.
-
-        @type name: C{str}
-        @param name: DNS name to resolve.
-
-        @type timeout: Sequence of C{int}
-        @param timeout: Number of seconds after which to reissue the query.
-            When the last timeout expires, the query is considered failed.
-
-        @rtype: L{Deferred}
-        @return: A L{Deferred} which fires with a three-tuple of lists of
-            L{twisted.names.dns.RRHeader} instances.  The first element of the
-            tuple gives answers.  The second element of the tuple gives
-            authorities.  The third element of the tuple gives additional
-            information.  The L{Deferred} may instead fail with one of the
-            exceptions defined in L{twisted.names.error} or with
-            C{NotImplementedError}.
-        """
-
-
-    def lookupMailRename(name, timeout=None):
-        """
-        Perform an MR record lookup.
-
-        @type name: C{str}
-        @param name: DNS name to resolve.
-
-        @type timeout: Sequence of C{int}
-        @param timeout: Number of seconds after which to reissue the query.
-            When the last timeout expires, the query is considered failed.
-
-        @rtype: L{Deferred}
-        @return: A L{Deferred} which fires with a three-tuple of lists of
-            L{twisted.names.dns.RRHeader} instances.  The first element of the
-            tuple gives answers.  The second element of the tuple gives
-            authorities.  The third element of the tuple gives additional
-            information.  The L{Deferred} may instead fail with one of the
-            exceptions defined in L{twisted.names.error} or with
-            C{NotImplementedError}.
-        """
-
-
-    def lookupPointer(name, timeout=None):
-        """
-        Perform a PTR record lookup.
-
-        @type name: C{str}
-        @param name: DNS name to resolve.
-
-        @type timeout: Sequence of C{int}
-        @param timeout: Number of seconds after which to reissue the query.
-            When the last timeout expires, the query is considered failed.
-
-        @rtype: L{Deferred}
-        @return: A L{Deferred} which fires with a three-tuple of lists of
-            L{twisted.names.dns.RRHeader} instances.  The first element of the
-            tuple gives answers.  The second element of the tuple gives
-            authorities.  The third element of the tuple gives additional
-            information.  The L{Deferred} may instead fail with one of the
-            exceptions defined in L{twisted.names.error} or with
-            C{NotImplementedError}.
-        """
-
-
-    def lookupAuthority(name, timeout=None):
-        """
-        Perform an SOA record lookup.
-
-        @type name: C{str}
-        @param name: DNS name to resolve.
-
-        @type timeout: Sequence of C{int}
-        @param timeout: Number of seconds after which to reissue the query.
-            When the last timeout expires, the query is considered failed.
-
-        @rtype: L{Deferred}
-        @return: A L{Deferred} which fires with a three-tuple of lists of
-            L{twisted.names.dns.RRHeader} instances.  The first element of the
-            tuple gives answers.  The second element of the tuple gives
-            authorities.  The third element of the tuple gives additional
-            information.  The L{Deferred} may instead fail with one of the
-            exceptions defined in L{twisted.names.error} or with
-            C{NotImplementedError}.
-        """
-
-
-    def lookupNull(name, timeout=None):
-        """
-        Perform a NULL record lookup.
-
-        @type name: C{str}
-        @param name: DNS name to resolve.
-
-        @type timeout: Sequence of C{int}
-        @param timeout: Number of seconds after which to reissue the query.
-            When the last timeout expires, the query is considered failed.
-
-        @rtype: L{Deferred}
-        @return: A L{Deferred} which fires with a three-tuple of lists of
-            L{twisted.names.dns.RRHeader} instances.  The first element of the
-            tuple gives answers.  The second element of the tuple gives
-            authorities.  The third element of the tuple gives additional
-            information.  The L{Deferred} may instead fail with one of the
-            exceptions defined in L{twisted.names.error} or with
-            C{NotImplementedError}.
-        """
-
-
-    def lookupWellKnownServices(name, timeout=None):
-        """
-        Perform a WKS record lookup.
-
-        @type name: C{str}
-        @param name: DNS name to resolve.
-
-        @type timeout: Sequence of C{int}
-        @param timeout: Number of seconds after which to reissue the query.
-            When the last timeout expires, the query is considered failed.
-
-        @rtype: L{Deferred}
-        @return: A L{Deferred} which fires with a three-tuple of lists of
-            L{twisted.names.dns.RRHeader} instances.  The first element of the
-            tuple gives answers.  The second element of the tuple gives
-            authorities.  The third element of the tuple gives additional
-            information.  The L{Deferred} may instead fail with one of the
-            exceptions defined in L{twisted.names.error} or with
-            C{NotImplementedError}.
-        """
-
-
-    def lookupHostInfo(name, timeout=None):
-        """
-        Perform a HINFO record lookup.
-
-        @type name: C{str}
-        @param name: DNS name to resolve.
-
-        @type timeout: Sequence of C{int}
-        @param timeout: Number of seconds after which to reissue the query.
-            When the last timeout expires, the query is considered failed.
-
-        @rtype: L{Deferred}
-        @return: A L{Deferred} which fires with a three-tuple of lists of
-            L{twisted.names.dns.RRHeader} instances.  The first element of the
-            tuple gives answers.  The second element of the tuple gives
-            authorities.  The third element of the tuple gives additional
-            information.  The L{Deferred} may instead fail with one of the
-            exceptions defined in L{twisted.names.error} or with
-            C{NotImplementedError}.
-        """
-
-
-    def lookupMailboxInfo(name, timeout=None):
-        """
-        Perform an MINFO record lookup.
-
-        @type name: C{str}
-        @param name: DNS name to resolve.
-
-        @type timeout: Sequence of C{int}
-        @param timeout: Number of seconds after which to reissue the query.
-            When the last timeout expires, the query is considered failed.
-
-        @rtype: L{Deferred}
-        @return: A L{Deferred} which fires with a three-tuple of lists of
-            L{twisted.names.dns.RRHeader} instances.  The first element of the
-            tuple gives answers.  The second element of the tuple gives
-            authorities.  The third element of the tuple gives additional
-            information.  The L{Deferred} may instead fail with one of the
-            exceptions defined in L{twisted.names.error} or with
-            C{NotImplementedError}.
-        """
-
-
-    def lookupText(name, timeout=None):
-        """
-        Perform a TXT record lookup.
-
-        @type name: C{str}
-        @param name: DNS name to resolve.
-
-        @type timeout: Sequence of C{int}
-        @param timeout: Number of seconds after which to reissue the query.
-            When the last timeout expires, the query is considered failed.
-
-        @rtype: L{Deferred}
-        @return: A L{Deferred} which fires with a three-tuple of lists of
-            L{twisted.names.dns.RRHeader} instances.  The first element of the
-            tuple gives answers.  The second element of the tuple gives
-            authorities.  The third element of the tuple gives additional
-            information.  The L{Deferred} may instead fail with one of the
-            exceptions defined in L{twisted.names.error} or with
-            C{NotImplementedError}.
-        """
-
-
-    def lookupResponsibility(name, timeout=None):
-        """
-        Perform an RP record lookup.
-
-        @type name: C{str}
-        @param name: DNS name to resolve.
-
-        @type timeout: Sequence of C{int}
-        @param timeout: Number of seconds after which to reissue the query.
-            When the last timeout expires, the query is considered failed.
-
-        @rtype: L{Deferred}
-        @return: A L{Deferred} which fires with a three-tuple of lists of
-            L{twisted.names.dns.RRHeader} instances.  The first element of the
-            tuple gives answers.  The second element of the tuple gives
-            authorities.  The third element of the tuple gives additional
-            information.  The L{Deferred} may instead fail with one of the
-            exceptions defined in L{twisted.names.error} or with
-            C{NotImplementedError}.
-        """
-
-
-    def lookupAFSDatabase(name, timeout=None):
-        """
-        Perform an AFSDB record lookup.
-
-        @type name: C{str}
-        @param name: DNS name to resolve.
-
-        @type timeout: Sequence of C{int}
-        @param timeout: Number of seconds after which to reissue the query.
-            When the last timeout expires, the query is considered failed.
-
-        @rtype: L{Deferred}
-        @return: A L{Deferred} which fires with a three-tuple of lists of
-            L{twisted.names.dns.RRHeader} instances.  The first element of the
-            tuple gives answers.  The second element of the tuple gives
-            authorities.  The third element of the tuple gives additional
-            information.  The L{Deferred} may instead fail with one of the
-            exceptions defined in L{twisted.names.error} or with
-            C{NotImplementedError}.
-        """
-
-
-    def lookupService(name, timeout=None):
-        """
-        Perform an SRV record lookup.
-
-        @type name: C{str}
-        @param name: DNS name to resolve.
-
-        @type timeout: Sequence of C{int}
-        @param timeout: Number of seconds after which to reissue the query.
-            When the last timeout expires, the query is considered failed.
-
-        @rtype: L{Deferred}
-        @return: A L{Deferred} which fires with a three-tuple of lists of
-            L{twisted.names.dns.RRHeader} instances.  The first element of the
-            tuple gives answers.  The second element of the tuple gives
-            authorities.  The third element of the tuple gives additional
-            information.  The L{Deferred} may instead fail with one of the
-            exceptions defined in L{twisted.names.error} or with
-            C{NotImplementedError}.
-        """
-
-
-    def lookupAllRecords(name, timeout=None):
-        """
-        Perform an ALL_RECORD lookup.
-
-        @type name: C{str}
-        @param name: DNS name to resolve.
-
-        @type timeout: Sequence of C{int}
-        @param timeout: Number of seconds after which to reissue the query.
-            When the last timeout expires, the query is considered failed.
-
-        @rtype: L{Deferred}
-        @return: A L{Deferred} which fires with a three-tuple of lists of
-            L{twisted.names.dns.RRHeader} instances.  The first element of the
-            tuple gives answers.  The second element of the tuple gives
-            authorities.  The third element of the tuple gives additional
-            information.  The L{Deferred} may instead fail with one of the
-            exceptions defined in L{twisted.names.error} or with
-            C{NotImplementedError}.
-        """
-
-
-    def lookupSenderPolicy(name, timeout= 10):
-        """
-        Perform a SPF record lookup.
-
-        @type name: C{str}
-        @param name: DNS name to resolve.
-
-        @type timeout: Sequence of C{int}
-        @param timeout: Number of seconds after which to reissue the query.
-            When the last timeout expires, the query is considered failed.
-
-        @rtype: L{Deferred}
-        @return: A L{Deferred} which fires with a three-tuple of lists of
-            L{twisted.names.dns.RRHeader} instances.  The first element of the
-            tuple gives answers.  The second element of the tuple gives
-            authorities.  The third element of the tuple gives additional
-            information.  The L{Deferred} may instead fail with one of the
-            exceptions defined in L{twisted.names.error} or with
-            C{NotImplementedError}.
-        """
-
-
-    def lookupNamingAuthorityPointer(name, timeout=None):
-        """
-        Perform a NAPTR record lookup.
-
-        @type name: C{str}
-        @param name: DNS name to resolve.
-
-        @type timeout: Sequence of C{int}
-        @param timeout: Number of seconds after which to reissue the query.
-            When the last timeout expires, the query is considered failed.
-
-        @rtype: L{Deferred}
-        @return: A L{Deferred} which fires with a three-tuple of lists of
-            L{twisted.names.dns.RRHeader} instances.  The first element of the
-            tuple gives answers.  The second element of the tuple gives
-            authorities.  The third element of the tuple gives additional
-            information.  The L{Deferred} may instead fail with one of the
-            exceptions defined in L{twisted.names.error} or with
-            C{NotImplementedError}.
-        """
-
-
-    def lookupZone(name, timeout=None):
-        """
-        Perform an AXFR record lookup.
-
-        NB This is quite different from other DNS requests. See
-        U{http://cr.yp.to/djbdns/axfr-notes.html} for more
-        information.
-
-        NB Unlike other C{lookup*} methods, the timeout here is not a
-        list of ints, it is a single int.
-
-        @type name: C{str}
-        @param name: DNS name to resolve.
-
-        @type timeout: C{int}
-        @param timeout: When this timeout expires, the query is
-            considered failed.
-
-        @rtype: L{Deferred}
-        @return: A L{Deferred} which fires with a three-tuple of lists of
-            L{twisted.names.dns.RRHeader} instances.
-            The first element of the tuple gives answers.
-            The second and third elements are always empty.
-            The L{Deferred} may instead fail with one of the
-            exceptions defined in L{twisted.names.error} or with
-            C{NotImplementedError}.
-        """
-
-
-
-class IReactorTCP(Interface):
-
-    def listenTCP(port, factory, backlog=50, interface=''):
-        """
-        Connects a given protocol factory to the given numeric TCP/IP port.
-
-        @param port: a port number on which to listen
-
-        @param factory: a L{twisted.internet.protocol.ServerFactory} instance
-
-        @param backlog: size of the listen queue
-
-        @param interface: The local IPv4 or IPv6 address to which to bind;
-            defaults to '', ie all IPv4 addresses.  To bind to all IPv4 and IPv6
-            addresses, you must call this method twice.
-
-        @return: an object that provides L{IListeningPort}.
-
-        @raise CannotListenError: as defined here
-                                  L{twisted.internet.error.CannotListenError},
-                                  if it cannot listen on this port (e.g., it
-                                  cannot bind to the required port number)
-        """
-
-    def connectTCP(host, port, factory, timeout=30, bindAddress=None):
-        """
-        Connect a TCP client.
-
-        @param host: a host name
-
-        @param port: a port number
-
-        @param factory: a L{twisted.internet.protocol.ClientFactory} instance
-
-        @param timeout: number of seconds to wait before assuming the
-                        connection has failed.
-
-        @param bindAddress: a (host, port) tuple of local address to bind
-                            to, or None.
-
-        @return: An object which provides L{IConnector}. This connector will
-                 call various callbacks on the factory when a connection is
-                 made, failed, or lost - see
-                 L{ClientFactory<twisted.internet.protocol.ClientFactory>}
-                 docs for details.
-        """
-
-class IReactorSSL(Interface):
-
-    def connectSSL(host, port, factory, contextFactory, timeout=30, bindAddress=None):
-        """
-        Connect a client Protocol to a remote SSL socket.
-
-        @param host: a host name
-
-        @param port: a port number
-
-        @param factory: a L{twisted.internet.protocol.ClientFactory} instance
-
-        @param contextFactory: a L{twisted.internet.ssl.ClientContextFactory} object.
-
-        @param timeout: number of seconds to wait before assuming the
-                        connection has failed.
-
-        @param bindAddress: a (host, port) tuple of local address to bind to,
-                            or C{None}.
-
-        @return: An object which provides L{IConnector}.
-        """
-
-    def listenSSL(port, factory, contextFactory, backlog=50, interface=''):
-        """
-        Connects a given protocol factory to the given numeric TCP/IP port.
-        The connection is a SSL one, using contexts created by the context
-        factory.
-
-        @param port: a port number on which to listen
-
-        @param factory: a L{twisted.internet.protocol.ServerFactory} instance
-
-        @param contextFactory: a L{twisted.internet.ssl.ContextFactory} instance
-
-        @param backlog: size of the listen queue
-
-        @param interface: the hostname to bind to, defaults to '' (all)
-        """
-
-
-
-class IReactorUNIX(Interface):
-    """
-    UNIX socket methods.
-    """
-
-    def connectUNIX(address, factory, timeout=30, checkPID=0):
-        """
-        Connect a client protocol to a UNIX socket.
-
-        @param address: a path to a unix socket on the filesystem.
-
-        @param factory: a L{twisted.internet.protocol.ClientFactory} instance
-
-        @param timeout: number of seconds to wait before assuming the connection
-            has failed.
-
-        @param checkPID: if True, check for a pid file to verify that a server
-            is listening.  If C{address} is a Linux abstract namespace path,
-            this must be C{False}.
-
-        @return: An object which provides L{IConnector}.
-        """
-
-
-    def listenUNIX(address, factory, backlog=50, mode=0o666, wantPID=0):
-        """
-        Listen on a UNIX socket.
-
-        @param address: a path to a unix socket on the filesystem.
-
-        @param factory: a L{twisted.internet.protocol.Factory} instance.
-
-        @param backlog: number of connections to allow in backlog.
-
-        @param mode: The mode (B{not} umask) to set on the unix socket.  See
-            platform specific documentation for information about how this
-            might affect connection attempts.
-        @type mode: C{int}
-
-        @param wantPID: if True, create a pidfile for the socket.  If C{address}
-            is a Linux abstract namespace path, this must be C{False}.
-
-        @return: An object which provides L{IListeningPort}.
-        """
-
-
-
-class IReactorUNIXDatagram(Interface):
-    """
-    Datagram UNIX socket methods.
-    """
-
-    def connectUNIXDatagram(address, protocol, maxPacketSize=8192, mode=0o666, bindAddress=None):
-        """
-        Connect a client protocol to a datagram UNIX socket.
-
-        @param address: a path to a unix socket on the filesystem.
-
-        @param protocol: a L{twisted.internet.protocol.ConnectedDatagramProtocol} instance
-
-        @param maxPacketSize: maximum packet size to accept
-
-        @param mode: The mode (B{not} umask) to set on the unix socket.  See
-            platform specific documentation for information about how this
-            might affect connection attempts.
-        @type mode: C{int}
-
-        @param bindAddress: address to bind to
-
-        @return: An object which provides L{IConnector}.
-        """
-
-
-    def listenUNIXDatagram(address, protocol, maxPacketSize=8192, mode=0o666):
-        """
-        Listen on a datagram UNIX socket.
-
-        @param address: a path to a unix socket on the filesystem.
-
-        @param protocol: a L{twisted.internet.protocol.DatagramProtocol} instance.
-
-        @param maxPacketSize: maximum packet size to accept
-
-        @param mode: The mode (B{not} umask) to set on the unix socket.  See
-            platform specific documentation for information about how this
-            might affect connection attempts.
-        @type mode: C{int}
-
-        @return: An object which provides L{IListeningPort}.
-        """
-
-
-
-class IReactorWin32Events(Interface):
-    """
-    Win32 Event API methods
-
-    @since: 10.2
-    """
-
-    def addEvent(event, fd, action):
-        """
-        Add a new win32 event to the event loop.
-
-        @param event: a Win32 event object created using win32event.CreateEvent()
-
-        @param fd: an instance of L{twisted.internet.abstract.FileDescriptor}
-
-        @param action: a string that is a method name of the fd instance.
-                       This method is called in response to the event.
-
-        @return: None
-        """
-
-
-    def removeEvent(event):
-        """
-        Remove an event.
-
-        @param event: a Win32 event object added using L{IReactorWin32Events.addEvent}
-
-        @return: None
-        """
-
-
-
-class IReactorUDP(Interface):
-    """
-    UDP socket methods.
-    """
-
-    def listenUDP(port, protocol, interface='', maxPacketSize=8192):
-        """
-        Connects a given DatagramProtocol to the given numeric UDP port.
-
-        @return: object which provides L{IListeningPort}.
-        """
-
-
-
-class IReactorMulticast(Interface):
-    """
-    UDP socket methods that support multicast.
-
-    IMPORTANT: This is an experimental new interface. It may change
-    without backwards compatibility. Suggestions are welcome.
-    """
-
-    def listenMulticast(port, protocol, interface='', maxPacketSize=8192,
-                        listenMultiple=False):
-        """
-        Connects a given
-        L{DatagramProtocol<twisted.internet.protocol.DatagramProtocol>} to the
-        given numeric UDP port.
-
-        @param listenMultiple: If set to True, allows multiple sockets to
-            bind to the same address and port number at the same time.
-        @type listenMultiple: C{bool}
-
-        @returns: An object which provides L{IListeningPort}.
-
-        @see: L{twisted.internet.interfaces.IMulticastTransport}
-        @see: U{http://twistedmatrix.com/documents/current/core/howto/udp.html}
-        """
-
-
-
-class IReactorSocket(Interface):
-    """
-    Methods which allow a reactor to use externally created sockets.
-
-    For example, to use C{adoptStreamPort} to implement behavior equivalent
-    to that of L{IReactorTCP.listenTCP}, you might write code like this::
-
-        from socket import SOMAXCONN, AF_INET, SOCK_STREAM, socket
-        portSocket = socket(AF_INET, SOCK_STREAM)
-        # Set FD_CLOEXEC on port, left as an exercise.  Then make it into a
-        # non-blocking listening port:
-        portSocket.setblocking(False)
-        portSocket.bind(('192.168.1.2', 12345))
-        portSocket.listen(SOMAXCONN)
-
-        # Now have the reactor use it as a TCP port
-        port = reactor.adoptStreamPort(
-            portSocket.fileno(), AF_INET, YourFactory())
-
-        # portSocket itself is no longer necessary, and needs to be cleaned
-        # up by us.
-        portSocket.close()
-
-        # Whenever the server is no longer needed, stop it as usual.
-        stoppedDeferred = port.stopListening()
-
-    Another potential use is to inherit a listening descriptor from a parent
-    process (for example, systemd or launchd), or to receive one over a UNIX
-    domain socket.
-
-    Some plans for extending this interface exist.  See:
-
-        - U{http://twistedmatrix.com/trac/ticket/5570}: established connections
-        - U{http://twistedmatrix.com/trac/ticket/5573}: AF_UNIX ports
-        - U{http://twistedmatrix.com/trac/ticket/5574}: SOCK_DGRAM sockets
-    """
-
-    def adoptStreamPort(fileDescriptor, addressFamily, factory):
-        """
-        Add an existing listening I{SOCK_STREAM} socket to the reactor to
-        monitor for new connections to accept and handle.
-
-        @param fileDescriptor: A file descriptor associated with a socket which
-            is already bound to an address and marked as listening.  The socket
-            must be set non-blocking.  Any additional flags (for example,
-            close-on-exec) must also be set by application code.  Application
-            code is responsible for closing the file descriptor, which may be
-            done as soon as C{adoptStreamPort} returns.
-        @type fileDescriptor: C{int}
-
-        @param addressFamily: The address family (or I{domain}) of the socket.
-            For example, L{socket.AF_INET6}.
-
-        @param factory: A L{ServerFactory} instance to use to create new
-            protocols to handle connections accepted via this socket.
-
-        @return: An object providing L{IListeningPort}.
-
-        @raise UnsupportedAddressFamily: If the given address family is not
-            supported by this reactor, or not supported with the given socket
-            type.
-
-        @raise UnsupportedSocketType: If the given socket type is not supported
-            by this reactor, or not supported with the given socket type.
-        """
-
-
-    def adoptStreamConnection(fileDescriptor, addressFamily, factory):
-        """
-        Add an existing connected I{SOCK_STREAM} socket to the reactor to
-        monitor for data.
-
-        Note that the given factory won't have its C{startFactory} and
-        C{stopFactory} methods called, as there is no sensible time to call
-        them in this situation.
-
-        @param fileDescriptor: A file descriptor associated with a socket which
-            is already connected.  The socket must be set non-blocking.  Any
-            additional flags (for example, close-on-exec) must also be set by
-            application code.  Application code is responsible for closing the
-            file descriptor, which may be done as soon as
-            C{adoptStreamConnection} returns.
-        @type fileDescriptor: C{int}
-
-        @param addressFamily: The address family (or I{domain}) of the socket.
-            For example, L{socket.AF_INET6}.
-
-        @param factory: A L{ServerFactory} instance to use to create a new
-            protocol to handle the connection via this socket.
-
-        @raise UnsupportedAddressFamily: If the given address family is not
-            supported by this reactor, or not supported with the given socket
-            type.
-
-        @raise UnsupportedSocketType: If the given socket type is not supported
-            by this reactor, or not supported with the given socket type.
-        """
-
-
-
-class IReactorProcess(Interface):
-
-    def spawnProcess(processProtocol, executable, args=(), env={}, path=None,
-                     uid=None, gid=None, usePTY=0, childFDs=None):
-        """
-        Spawn a process, with a process protocol.
-
-        @type processProtocol: L{IProcessProtocol} provider
-        @param processProtocol: An object which will be notified of all
-            events related to the created process.
-
-        @param executable: the file name to spawn - the full path should be
-                           used.
-
-        @param args: the command line arguments to pass to the process; a
-                     sequence of strings. The first string should be the
-                     executable's name.
-
-        @type env: a C{dict} mapping C{str} to C{str}, or C{None}.
-        @param env: the environment variables to pass to the child process. The
-                    resulting behavior varies between platforms. If
-                      - C{env} is not set:
-                        - On POSIX: pass an empty environment.
-                        - On Windows: pass C{os.environ}.
-                      - C{env} is C{None}:
-                        - On POSIX: pass C{os.environ}.
-                        - On Windows: pass C{os.environ}.
-                      - C{env} is a C{dict}:
-                        - On POSIX: pass the key/value pairs in C{env} as the
-                          complete environment.
-                        - On Windows: update C{os.environ} with the key/value
-                          pairs in the C{dict} before passing it. As a
-                          consequence of U{bug #1640
-                          <http://twistedmatrix.com/trac/ticket/1640>}, passing
-                          keys with empty values in an effort to unset
-                          environment variables I{won't} unset them.
-
-        @param path: the path to run the subprocess in - defaults to the
-                     current directory.
-
-        @param uid: user ID to run the subprocess as. (Only available on
-                    POSIX systems.)
-
-        @param gid: group ID to run the subprocess as. (Only available on
-                    POSIX systems.)
-
-        @param usePTY: if true, run this process in a pseudo-terminal.
-                       optionally a tuple of C{(masterfd, slavefd, ttyname)},
-                       in which case use those file descriptors.
-                       (Not available on all systems.)
-
-        @param childFDs: A dictionary mapping file descriptors in the new child
-                         process to an integer or to the string 'r' or 'w'.
-
-                         If the value is an integer, it specifies a file
-                         descriptor in the parent process which will be mapped
-                         to a file descriptor (specified by the key) in the
-                         child process.  This is useful for things like inetd
-                         and shell-like file redirection.
-
-                         If it is the string 'r', a pipe will be created and
-                         attached to the child at that file descriptor: the
-                         child will be able to write to that file descriptor
-                         and the parent will receive read notification via the
-                         L{IProcessProtocol.childDataReceived} callback.  This
-                         is useful for the child's stdout and stderr.
-
-                         If it is the string 'w', similar setup to the previous
-                         case will occur, with the pipe being readable by the
-                         child instead of writeable.  The parent process can
-                         write to that file descriptor using
-                         L{IProcessTransport.writeToChild}.  This is useful for
-                         the child's stdin.
-
-                         If childFDs is not passed, the default behaviour is to
-                         use a mapping that opens the usual stdin/stdout/stderr
-                         pipes.
-
-        @see: L{twisted.internet.protocol.ProcessProtocol}
-
-        @return: An object which provides L{IProcessTransport}.
-
-        @raise OSError: Raised with errno C{EAGAIN} or C{ENOMEM} if there are
-                        insufficient system resources to create a new process.
-        """
-
-class IReactorTime(Interface):
-    """
-    Time methods that a Reactor should implement.
-    """
-
-    def seconds():
-        """
-        Get the current time in seconds.
-
-        @return: A number-like object of some sort.
-        """
-
-
-    def callLater(delay, callable, *args, **kw):
-        """
-        Call a function later.
-
-        @type delay:  C{float}
-        @param delay: the number of seconds to wait.
-
-        @param callable: the callable object to call later.
-
-        @param args: the arguments to call it with.
-
-        @param kw: the keyword arguments to call it with.
-
-        @return: An object which provides L{IDelayedCall} and can be used to
-                 cancel the scheduled call, by calling its C{cancel()} method.
-                 It also may be rescheduled by calling its C{delay()} or
-                 C{reset()} methods.
-        """
-
-
-    def getDelayedCalls():
-        """
-        Retrieve all currently scheduled delayed calls.
-
-        @return: A tuple of all L{IDelayedCall} providers representing all
-                 currently scheduled calls. This is everything that has been
-                 returned by C{callLater} but not yet called or canceled.
-        """
-
-
-class IDelayedCall(Interface):
-    """
-    A scheduled call.
-
-    There are probably other useful methods we can add to this interface;
-    suggestions are welcome.
-    """
-
-    def getTime():
-        """
-        Get time when delayed call will happen.
-
-        @return: time in seconds since epoch (a float).
-        """
-
-    def cancel():
-        """
-        Cancel the scheduled call.
-
-        @raises twisted.internet.error.AlreadyCalled: if the call has already
-            happened.
-        @raises twisted.internet.error.AlreadyCancelled: if the call has already
-            been cancelled.
-        """
-
-    def delay(secondsLater):
-        """
-        Delay the scheduled call.
-
-        @param secondsLater: how many seconds from its current firing time to delay
-
-        @raises twisted.internet.error.AlreadyCalled: if the call has already
-            happened.
-        @raises twisted.internet.error.AlreadyCancelled: if the call has already
-            been cancelled.
-        """
-
-    def reset(secondsFromNow):
-        """
-        Reset the scheduled call's timer.
-
-        @param secondsFromNow: how many seconds from now it should fire,
-            equivalent to C{.cancel()} and then doing another
-            C{reactor.callLater(secondsLater, ...)}
-
-        @raises twisted.internet.error.AlreadyCalled: if the call has already
-            happened.
-        @raises twisted.internet.error.AlreadyCancelled: if the call has already
-            been cancelled.
-        """
-
-    def active():
-        """
-        @return: True if this call is still active, False if it has been
-                 called or cancelled.
-        """
-
-class IReactorThreads(Interface):
-    """
-    Dispatch methods to be run in threads.
-
-    Internally, this should use a thread pool and dispatch methods to them.
-    """
-
-    def getThreadPool():
-        """
-        Return the threadpool used by L{callInThread}.  Create it first if
-        necessary.
-
-        @rtype: L{twisted.python.threadpool.ThreadPool}
-        """
-
-
-    def callInThread(callable, *args, **kwargs):
-        """
-        Run the callable object in a separate thread.
-        """
-
-
-    def callFromThread(callable, *args, **kw):
-        """
-        Cause a function to be executed by the reactor thread.
-
-        Use this method when you want to run a function in the reactor's thread
-        from another thread.  Calling L{callFromThread} should wake up the main
-        thread (where L{reactor.run()<reactor.run>} is executing) and run the
-        given callable in that thread.
-
-        If you're writing a multi-threaded application the C{callable} may need
-        to be thread safe, but this method doesn't require it as such. If you
-        want to call a function in the next mainloop iteration, but you're in
-        the same thread, use L{callLater} with a delay of 0.
-        """
-
-
-    def suggestThreadPoolSize(size):
-        """
-        Suggest the size of the internal threadpool used to dispatch functions
-        passed to L{callInThread}.
-        """
-
-
-class IReactorCore(Interface):
-    """
-    Core methods that a Reactor must implement.
-    """
-
-    running = Attribute(
-        "A C{bool} which is C{True} from I{during startup} to "
-        "I{during shutdown} and C{False} the rest of the time.")
-
-
-    def resolve(name, timeout=10):
-        """
-        Return a L{twisted.internet.defer.Deferred} that will resolve a hostname.
-        """
-
-    def run():
-        """
-        Fire 'startup' System Events, move the reactor to the 'running'
-        state, then run the main loop until it is stopped with C{stop()} or
-        C{crash()}.
-        """
-
-    def stop():
-        """
-        Fire 'shutdown' System Events, which will move the reactor to the
-        'stopped' state and cause C{reactor.run()} to exit.
-        """
-
-    def crash():
-        """
-        Stop the main loop *immediately*, without firing any system events.
-
-        This is named as it is because this is an extremely "rude" thing to do;
-        it is possible to lose data and put your system in an inconsistent
-        state by calling this.  However, it is necessary, as sometimes a system
-        can become wedged in a pre-shutdown call.
-        """
-
-    def iterate(delay=0):
-        """
-        Run the main loop's I/O polling function for a period of time.
-
-        This is most useful in applications where the UI is being drawn "as
-        fast as possible", such as games. All pending L{IDelayedCall}s will
-        be called.
-
-        The reactor must have been started (via the C{run()} method) prior to
-        any invocations of this method.  It must also be stopped manually
-        after the last call to this method (via the C{stop()} method).  This
-        method is not re-entrant: you must not call it recursively; in
-        particular, you must not call it while the reactor is running.
-        """
-
-    def fireSystemEvent(eventType):
-        """
-        Fire a system-wide event.
-
-        System-wide events are things like 'startup', 'shutdown', and
-        'persist'.
-        """
-
-    def addSystemEventTrigger(phase, eventType, callable, *args, **kw):
-        """
-        Add a function to be called when a system event occurs.
-
-        Each "system event" in Twisted, such as 'startup', 'shutdown', and
-        'persist', has 3 phases: 'before', 'during', and 'after' (in that
-        order, of course).  These events will be fired internally by the
-        Reactor.
-
-        An implementor of this interface must only implement those events
-        described here.
-
-        Callbacks registered for the "before" phase may return either None or a
-        Deferred.  The "during" phase will not execute until all of the
-        Deferreds from the "before" phase have fired.
-
-        Once the "during" phase is running, all of the remaining triggers must
-        execute; their return values must be ignored.
-
-        @param phase: a time to call the event -- either the string 'before',
-                      'after', or 'during', describing when to call it
-                      relative to the event's execution.
-
-        @param eventType: this is a string describing the type of event.
-
-        @param callable: the object to call before shutdown.
-
-        @param args: the arguments to call it with.
-
-        @param kw: the keyword arguments to call it with.
-
-        @return: an ID that can be used to remove this call with
-                 removeSystemEventTrigger.
-        """
-
-    def removeSystemEventTrigger(triggerID):
-        """
-        Removes a trigger added with addSystemEventTrigger.
-
-        @param triggerID: a value returned from addSystemEventTrigger.
-
-        @raise KeyError: If there is no system event trigger for the given
-            C{triggerID}.
-
-        @raise ValueError: If there is no system event trigger for the given
-            C{triggerID}.
-
-        @raise TypeError: If there is no system event trigger for the given
-            C{triggerID}.
-        """
-
-    def callWhenRunning(callable, *args, **kw):
-        """
-        Call a function when the reactor is running.
-
-        If the reactor has not started, the callable will be scheduled
-        to run when it does start. Otherwise, the callable will be invoked
-        immediately.
-
-        @param callable: the callable object to call later.
-
-        @param args: the arguments to call it with.
-
-        @param kw: the keyword arguments to call it with.
-
-        @return: None if the callable was invoked, otherwise a system
-                 event id for the scheduled call.
-        """
-
-
-class IReactorPluggableResolver(Interface):
-    """
-    A reactor with a pluggable name resolver interface.
-    """
-
-    def installResolver(resolver):
-        """
-        Set the internal resolver to use to for name lookups.
-
-        @type resolver: An object implementing the L{IResolverSimple} interface
-        @param resolver: The new resolver to use.
-
-        @return: The previously installed resolver.
-        """
-
-
-class IReactorDaemonize(Interface):
-    """
-    A reactor which provides hooks that need to be called before and after
-    daemonization.
-
-    Notes:
-       - This interface SHOULD NOT be called by applications.
-       - This interface should only be implemented by reactors as a workaround
-         (in particular, it's implemented currently only by kqueue()).
-         For details please see the comments on ticket #1918.
-    """
-
-    def beforeDaemonize():
-        """
-        Hook to be called immediately before daemonization. No reactor methods
-        may be called until L{afterDaemonize} is called.
-
-        @return: C{None}.
-        """
-
-
-    def afterDaemonize():
-        """
-        Hook to be called immediately after daemonization. This may only be
-        called after L{beforeDaemonize} had been called previously.
-
-        @return: C{None}.
-        """
-
-
-
-class IReactorFDSet(Interface):
-    """
-    Implement me to be able to use L{IFileDescriptor} type resources.
-
-    This assumes that your main-loop uses UNIX-style numeric file descriptors
-    (or at least similarly opaque IDs returned from a .fileno() method)
-    """
-
-    def addReader(reader):
-        """
-        I add reader to the set of file descriptors to get read events for.
-
-        @param reader: An L{IReadDescriptor} provider that will be checked for
-                       read events until it is removed from the reactor with
-                       L{removeReader}.
-
-        @return: C{None}.
-        """
-
-    def addWriter(writer):
-        """
-        I add writer to the set of file descriptors to get write events for.
-
-        @param writer: An L{IWriteDescriptor} provider that will be checked for
-                       write events until it is removed from the reactor with
-                       L{removeWriter}.
-
-        @return: C{None}.
-        """
-
-    def removeReader(reader):
-        """
-        Removes an object previously added with L{addReader}.
-
-        @return: C{None}.
-        """
-
-    def removeWriter(writer):
-        """
-        Removes an object previously added with L{addWriter}.
-
-        @return: C{None}.
-        """
-
-    def removeAll():
-        """
-        Remove all readers and writers.
-
-        Should not remove reactor internal reactor connections (like a waker).
-
-        @return: A list of L{IReadDescriptor} and L{IWriteDescriptor} providers
-                 which were removed.
-        """
-
-    def getReaders():
-        """
-        Return the list of file descriptors currently monitored for input
-        events by the reactor.
-
-        @return: the list of file descriptors monitored for input events.
-        @rtype: C{list} of C{IReadDescriptor}
-        """
-
-    def getWriters():
-        """
-        Return the list file descriptors currently monitored for output events
-        by the reactor.
-
-        @return: the list of file descriptors monitored for output events.
-        @rtype: C{list} of C{IWriteDescriptor}
-        """
-
-
-class IListeningPort(Interface):
-    """
-    A listening port.
-    """
-
-    def startListening():
-        """
-        Start listening on this port.
-
-        @raise CannotListenError: If it cannot listen on this port (e.g., it is
-                                  a TCP port and it cannot bind to the required
-                                  port number).
-        """
-
-    def stopListening():
-        """
-        Stop listening on this port.
-
-        If it does not complete immediately, will return Deferred that fires
-        upon completion.
-        """
-
-    def getHost():
-        """
-        Get the host that this port is listening for.
-
-        @return: An L{IAddress} provider.
-        """
-
-
-class ILoggingContext(Interface):
-    """
-    Give context information that will be used to log events generated by
-    this item.
-    """
-
-    def logPrefix():
-        """
-        @return: Prefix used during log formatting to indicate context.
-        @rtype: C{str}
-        """
-
-
-
-class IFileDescriptor(ILoggingContext):
-    """
-    An interface representing a UNIX-style numeric file descriptor.
-    """
-
-    def fileno():
-        """
-        @raise: If the descriptor no longer has a valid file descriptor
-            number associated with it.
-
-        @return: The platform-specified representation of a file descriptor
-            number.  Or C{-1} if the descriptor no longer has a valid file
-            descriptor number associated with it.  As long as the descriptor
-            is valid, calls to this method on a particular instance must
-            return the same value.
-        """
-
-
-    def connectionLost(reason):
-        """
-        Called when the connection was lost.
-
-        This is called when the connection on a selectable object has been
-        lost.  It will be called whether the connection was closed explicitly,
-        an exception occurred in an event handler, or the other end of the
-        connection closed it first.
-
-        See also L{IHalfCloseableDescriptor} if your descriptor wants to be
-        notified separately of the two halves of the connection being closed.
-
-        @param reason: A failure instance indicating the reason why the
-                       connection was lost.  L{error.ConnectionLost} and
-                       L{error.ConnectionDone} are of special note, but the
-                       failure may be of other classes as well.
-        """
-
-
-
-class IReadDescriptor(IFileDescriptor):
-    """
-    An L{IFileDescriptor} that can read.
-
-    This interface is generally used in conjunction with L{IReactorFDSet}.
-    """
-
-    def doRead():
-        """
-        Some data is available for reading on your descriptor.
-
-        @return: If an error is encountered which causes the descriptor to
-            no longer be valid, a L{Failure} should be returned.  Otherwise,
-            C{None}.
-        """
-
-
-class IWriteDescriptor(IFileDescriptor):
-    """
-    An L{IFileDescriptor} that can write.
-
-    This interface is generally used in conjunction with L{IReactorFDSet}.
-    """
-
-    def doWrite():
-        """
-        Some data can be written to your descriptor.
-
-        @return: If an error is encountered which causes the descriptor to
-            no longer be valid, a L{Failure} should be returned.  Otherwise,
-            C{None}.
-        """
-
-
-class IReadWriteDescriptor(IReadDescriptor, IWriteDescriptor):
-    """
-    An L{IFileDescriptor} that can both read and write.
-    """
-
-
-class IHalfCloseableDescriptor(Interface):
-    """
-    A descriptor that can be half-closed.
-    """
-
-    def writeConnectionLost(reason):
-        """
-        Indicates write connection was lost.
-        """
-
-    def readConnectionLost(reason):
-        """
-        Indicates read connection was lost.
-        """
-
-
-class ISystemHandle(Interface):
-    """
-    An object that wraps a networking OS-specific handle.
-    """
-
-    def getHandle():
-        """
-        Return a system- and reactor-specific handle.
-
-        This might be a socket.socket() object, or some other type of
-        object, depending on which reactor is being used. Use and
-        manipulate at your own risk.
-
-        This might be used in cases where you want to set specific
-        options not exposed by the Twisted APIs.
-        """
-
-
-class IConsumer(Interface):
-    """
-    A consumer consumes data from a producer.
-    """
-
-    def registerProducer(producer, streaming):
-        """
-        Register to receive data from a producer.
-
-        This sets self to be a consumer for a producer.  When this object runs
-        out of data (as when a send(2) call on a socket succeeds in moving the
-        last data from a userspace buffer into a kernelspace buffer), it will
-        ask the producer to resumeProducing().
-
-        For L{IPullProducer} providers, C{resumeProducing} will be called once
-        each time data is required.
-
-        For L{IPushProducer} providers, C{pauseProducing} will be called
-        whenever the write buffer fills up and C{resumeProducing} will only be
-        called when it empties.
-
-        @type producer: L{IProducer} provider
-
-        @type streaming: C{bool}
-        @param streaming: C{True} if C{producer} provides L{IPushProducer},
-        C{False} if C{producer} provides L{IPullProducer}.
-
-        @raise RuntimeError: If a producer is already registered.
-
-        @return: C{None}
-        """
-
-
-    def unregisterProducer():
-        """
-        Stop consuming data from a producer, without disconnecting.
-        """
-
-
-    def write(data):
-        """
-        The producer will write data by calling this method.
-
-        The implementation must be non-blocking and perform whatever
-        buffering is necessary.  If the producer has provided enough data
-        for now and it is a L{IPushProducer}, the consumer may call its
-        C{pauseProducing} method.
-        """
-
-
-
-class IProducer(Interface):
-    """
-    A producer produces data for a consumer.
-
-    Typically producing is done by calling the write method of an class
-    implementing L{IConsumer}.
-    """
-
-    def stopProducing():
-        """
-        Stop producing data.
-
-        This tells a producer that its consumer has died, so it must stop
-        producing data for good.
-        """
-
-
-class IPushProducer(IProducer):
-    """
-    A push producer, also known as a streaming producer is expected to
-    produce (write to this consumer) data on a continuous basis, unless
-    it has been paused. A paused push producer will resume producing
-    after its resumeProducing() method is called.   For a push producer
-    which is not pauseable, these functions may be noops.
-    """
-
-    def pauseProducing():
-        """
-        Pause producing data.
-
-        Tells a producer that it has produced too much data to process for
-        the time being, and to stop until resumeProducing() is called.
-        """
-    def resumeProducing():
-        """
-        Resume producing data.
-
-        This tells a producer to re-add itself to the main loop and produce
-        more data for its consumer.
-        """
-
-class IPullProducer(IProducer):
-    """
-    A pull producer, also known as a non-streaming producer, is
-    expected to produce data each time resumeProducing() is called.
-    """
-
-    def resumeProducing():
-        """
-        Produce data for the consumer a single time.
-
-        This tells a producer to produce data for the consumer once
-        (not repeatedly, once only). Typically this will be done
-        by calling the consumer's write() method a single time with
-        produced data.
-        """
-
-class IProtocol(Interface):
-
-    def dataReceived(data):
-        """
-        Called whenever data is received.
-
-        Use this method to translate to a higher-level message.  Usually, some
-        callback will be made upon the receipt of each complete protocol
-        message.
-
-        @param data: a string of indeterminate length.  Please keep in mind
-            that you will probably need to buffer some data, as partial
-            (or multiple) protocol messages may be received!  I recommend
-            that unit tests for protocols call through to this method with
-            differing chunk sizes, down to one byte at a time.
-        """
-
-    def connectionLost(reason):
-        """
-        Called when the connection is shut down.
-
-        Clear any circular references here, and any external references
-        to this Protocol.  The connection has been closed. The C{reason}
-        Failure wraps a L{twisted.internet.error.ConnectionDone} or
-        L{twisted.internet.error.ConnectionLost} instance (or a subclass
-        of one of those).
-
-        @type reason: L{twisted.python.failure.Failure}
-        """
-
-    def makeConnection(transport):
-        """
-        Make a connection to a transport and a server.
-        """
-
-    def connectionMade():
-        """
-        Called when a connection is made.
-
-        This may be considered the initializer of the protocol, because
-        it is called when the connection is completed.  For clients,
-        this is called once the connection to the server has been
-        established; for servers, this is called after an accept() call
-        stops blocking and a socket has been received.  If you need to
-        send any greeting or initial message, do it here.
-        """
-
-
-class IProcessProtocol(Interface):
-    """
-    Interface for process-related event handlers.
-    """
-
-    def makeConnection(process):
-        """
-        Called when the process has been created.
-
-        @type process: L{IProcessTransport} provider
-        @param process: An object representing the process which has been
-            created and associated with this protocol.
-        """
-
-
-    def childDataReceived(childFD, data):
-        """
-        Called when data arrives from the child process.
-
-        @type childFD: C{int}
-        @param childFD: The file descriptor from which the data was
-            received.
-
-        @type data: C{str}
-        @param data: The data read from the child's file descriptor.
-        """
-
-
-    def childConnectionLost(childFD):
-        """
-        Called when a file descriptor associated with the child process is
-        closed.
-
-        @type childFD: C{int}
-        @param childFD: The file descriptor which was closed.
-        """
-
-
-    def processExited(reason):
-        """
-        Called when the child process exits.
-
-        @type reason: L{twisted.python.failure.Failure}
-        @param reason: A failure giving the reason the child process
-            terminated.  The type of exception for this failure is either
-            L{twisted.internet.error.ProcessDone} or
-            L{twisted.internet.error.ProcessTerminated}.
-
-        @since: 8.2
-        """
-
-
-    def processEnded(reason):
-        """
-        Called when the child process exits and all file descriptors associated
-        with it have been closed.
-
-        @type reason: L{twisted.python.failure.Failure}
-        @param reason: A failure giving the reason the child process
-            terminated.  The type of exception for this failure is either
-            L{twisted.internet.error.ProcessDone} or
-            L{twisted.internet.error.ProcessTerminated}.
-        """
-
-
-
-class IHalfCloseableProtocol(Interface):
-    """
-    Implemented to indicate they want notification of half-closes.
-
-    TCP supports the notion of half-closing the connection, e.g.
-    closing the write side but still not stopping reading. A protocol
-    that implements this interface will be notified of such events,
-    instead of having connectionLost called.
-    """
-
-    def readConnectionLost():
-        """
-        Notification of the read connection being closed.
-
-        This indicates peer did half-close of write side. It is now
-        the responsibility of the this protocol to call
-        loseConnection().  In addition, the protocol MUST make sure a
-        reference to it still exists (i.e. by doing a callLater with
-        one of its methods, etc.)  as the reactor will only have a
-        reference to it if it is writing.
-
-        If the protocol does not do so, it might get garbage collected
-        without the connectionLost method ever being called.
-        """
-
-    def writeConnectionLost():
-        """
-        Notification of the write connection being closed.
-
-        This will never be called for TCP connections as TCP does not
-        support notification of this type of half-close.
-        """
-
-
-
-class IFileDescriptorReceiver(Interface):
-    """
-    Protocols may implement L{IFileDescriptorReceiver} to receive file
-    descriptors sent to them.  This is useful in conjunction with
-    L{IUNIXTransport}, which allows file descriptors to be sent between
-    processes on a single host.
-    """
-    def fileDescriptorReceived(descriptor):
-        """
-        Called when a file descriptor is received over the connection.
-
-        @param descriptor: The descriptor which was received.
-        @type descriptor: C{int}
-
-        @return: C{None}
-        """
-
-
-
-class IProtocolFactory(Interface):
-    """
-    Interface for protocol factories.
-    """
-
-    def buildProtocol(addr):
-        """
-        Called when a connection has been established to addr.
-
-        If None is returned, the connection is assumed to have been refused,
-        and the Port will close the connection.
-
-        @type addr: (host, port)
-        @param addr: The address of the newly-established connection
-
-        @return: None if the connection was refused, otherwise an object
-                 providing L{IProtocol}.
-        """
-
-    def doStart():
-        """
-        Called every time this is connected to a Port or Connector.
-        """
-
-    def doStop():
-        """
-        Called every time this is unconnected from a Port or Connector.
-        """
-
-
-class ITransport(Interface):
-    """
-    I am a transport for bytes.
-
-    I represent (and wrap) the physical connection and synchronicity
-    of the framework which is talking to the network.  I make no
-    representations about whether calls to me will happen immediately
-    or require returning to a control loop, or whether they will happen
-    in the same or another thread.  Consider methods of this class
-    (aside from getPeer) to be 'thrown over the wall', to happen at some
-    indeterminate time.
-    """
-
-    def write(data):
-        """
-        Write some data to the physical connection, in sequence, in a
-        non-blocking fashion.
-
-        If possible, make sure that it is all written.  No data will
-        ever be lost, although (obviously) the connection may be closed
-        before it all gets through.
-        """
-
-    def writeSequence(data):
-        """
-        Write a list of strings to the physical connection.
-
-        If possible, make sure that all of the data is written to
-        the socket at once, without first copying it all into a
-        single string.
-        """
-
-    def loseConnection():
-        """
-        Close my connection, after writing all pending data.
-
-        Note that if there is a registered producer on a transport it
-        will not be closed until the producer has been unregistered.
-        """
-
-    def getPeer():
-        """
-        Get the remote address of this connection.
-
-        Treat this method with caution.  It is the unfortunate result of the
-        CGI and Jabber standards, but should not be considered reliable for
-        the usual host of reasons; port forwarding, proxying, firewalls, IP
-        masquerading, etc.
-
-        @return: An L{IAddress} provider.
-        """
-
-    def getHost():
-        """
-        Similar to getPeer, but returns an address describing this side of the
-        connection.
-
-        @return: An L{IAddress} provider.
-        """
-
-
-class ITCPTransport(ITransport):
-    """
-    A TCP based transport.
-    """
-
-    def loseWriteConnection():
-        """
-        Half-close the write side of a TCP connection.
-
-        If the protocol instance this is attached to provides
-        IHalfCloseableProtocol, it will get notified when the operation is
-        done. When closing write connection, as with loseConnection this will
-        only happen when buffer has emptied and there is no registered
-        producer.
-        """
-
-
-    def abortConnection():
-        """
-        Close the connection abruptly.
-
-        Discards any buffered data, stops any registered producer,
-        and, if possible, notifies the other end of the unclean
-        closure.
-
-        @since: 11.1
-        """
-
-
-    def getTcpNoDelay():
-        """
-        Return if C{TCP_NODELAY} is enabled.
-        """
-
-    def setTcpNoDelay(enabled):
-        """
-        Enable/disable C{TCP_NODELAY}.
-
-        Enabling C{TCP_NODELAY} turns off Nagle's algorithm. Small packets are
-        sent sooner, possibly at the expense of overall throughput.
-        """
-
-    def getTcpKeepAlive():
-        """
-        Return if C{SO_KEEPALIVE} is enabled.
-        """
-
-    def setTcpKeepAlive(enabled):
-        """
-        Enable/disable C{SO_KEEPALIVE}.
-
-        Enabling C{SO_KEEPALIVE} sends packets periodically when the connection
-        is otherwise idle, usually once every two hours. They are intended
-        to allow detection of lost peers in a non-infinite amount of time.
-        """
-
-    def getHost():
-        """
-        Returns L{IPv4Address} or L{IPv6Address}.
-        """
-
-    def getPeer():
-        """
-        Returns L{IPv4Address} or L{IPv6Address}.
-        """
-
-
-
-class IUNIXTransport(ITransport):
-    """
-    Transport for stream-oriented unix domain connections.
-    """
-    def sendFileDescriptor(descriptor):
-        """
-        Send a duplicate of this (file, socket, pipe, etc) descriptor to the
-        other end of this connection.
-
-        The send is non-blocking and will be queued if it cannot be performed
-        immediately.  The send will be processed in order with respect to other
-        C{sendFileDescriptor} calls on this transport, but not necessarily with
-        respect to C{write} calls on this transport.  The send can only be
-        processed if there are also bytes in the normal connection-oriented send
-        buffer (ie, you must call C{write} at least as many times as you call
-        C{sendFileDescriptor}).
-
-        @param descriptor: An C{int} giving a valid file descriptor in this
-            process.  Note that a I{file descriptor} may actually refer to a
-            socket, a pipe, or anything else POSIX tries to treat in the same
-            way as a file.
-
-        @return: C{None}
-        """
-
-
-
-class ITLSTransport(ITCPTransport):
-    """
-    A TCP transport that supports switching to TLS midstream.
-
-    Once TLS mode is started the transport will implement L{ISSLTransport}.
-    """
-
-    def startTLS(contextFactory):
-        """
-        Initiate TLS negotiation.
-
-        @param contextFactory: A context factory (see L{ssl.py<twisted.internet.ssl>})
-        """
-
-class ISSLTransport(ITCPTransport):
-    """
-    A SSL/TLS based transport.
-    """
-
-    def getPeerCertificate():
-        """
-        Return an object with the peer's certificate info.
-        """
-
-
-class IProcessTransport(ITransport):
-    """
-    A process transport.
-    """
-
-    pid = Attribute(
-        "From before L{IProcessProtocol.makeConnection} is called to before "
-        "L{IProcessProtocol.processEnded} is called, C{pid} is an L{int} "
-        "giving the platform process ID of this process.  C{pid} is L{None} "
-        "at all other times.")
-
-    def closeStdin():
-        """
-        Close stdin after all data has been written out.
-        """
-
-    def closeStdout():
-        """
-        Close stdout.
-        """
-
-    def closeStderr():
-        """
-        Close stderr.
-        """
-
-    def closeChildFD(descriptor):
-        """
-        Close a file descriptor which is connected to the child process, identified
-        by its FD in the child process.
-        """
-
-    def writeToChild(childFD, data):
-        """
-        Similar to L{ITransport.write} but also allows the file descriptor in
-        the child process which will receive the bytes to be specified.
-
-        @type childFD: C{int}
-        @param childFD: The file descriptor to which to write.
-
-        @type data: C{str}
-        @param data: The bytes to write.
-
-        @return: C{None}
-
-        @raise KeyError: If C{childFD} is not a file descriptor that was mapped
-            in the child when L{IReactorProcess.spawnProcess} was used to create
-            it.
-        """
-
-    def loseConnection():
-        """
-        Close stdin, stderr and stdout.
-        """
-
-    def signalProcess(signalID):
-        """
-        Send a signal to the process.
-
-        @param signalID: can be
-          - one of C{"KILL"}, C{"TERM"}, or C{"INT"}.
-              These will be implemented in a
-              cross-platform manner, and so should be used
-              if possible.
-          - an integer, where it represents a POSIX
-              signal ID.
-
-        @raise twisted.internet.error.ProcessExitedAlready: If the process has
-            already exited.
-        @raise OSError: If the C{os.kill} call fails with an errno different
-            from C{ESRCH}.
-        """
-
-
-class IServiceCollection(Interface):
-    """
-    An object which provides access to a collection of services.
-    """
-
-    def getServiceNamed(serviceName):
-        """
-        Retrieve the named service from this application.
-
-        Raise a C{KeyError} if there is no such service name.
-        """
-
-    def addService(service):
-        """
-        Add a service to this collection.
-        """
-
-    def removeService(service):
-        """
-        Remove a service from this collection.
-        """
-
-
-class IUDPTransport(Interface):
-    """
-    Transport for UDP DatagramProtocols.
-    """
-
-    def write(packet, addr=None):
-        """
-        Write packet to given address.
-
-        @param addr: a tuple of (ip, port). For connected transports must
-                     be the address the transport is connected to, or None.
-                     In non-connected mode this is mandatory.
-
-        @raise twisted.internet.error.MessageLengthError: C{packet} was too
-        long.
-        """
-
-    def connect(host, port):
-        """
-        Connect the transport to an address.
-
-        This changes it to connected mode. Datagrams can only be sent to
-        this address, and will only be received from this address. In addition
-        the protocol's connectionRefused method might get called if destination
-        is not receiving datagrams.
-
-        @param host: an IP address, not a domain name ('127.0.0.1', not 'localhost')
-        @param port: port to connect to.
-        """
-
-    def getHost():
-        """
-        Returns L{IPv4Address}.
-        """
-
-    def stopListening():
-        """
-        Stop listening on this port.
-
-        If it does not complete immediately, will return L{Deferred} that fires
-        upon completion.
-        """
-
-
-
-class IUNIXDatagramTransport(Interface):
-    """
-    Transport for UDP PacketProtocols.
-    """
-
-    def write(packet, address):
-        """
-        Write packet to given address.
-        """
-
-    def getHost():
-        """
-        Returns L{UNIXAddress}.
-        """
-
-
-class IUNIXDatagramConnectedTransport(Interface):
-    """
-    Transport for UDP ConnectedPacketProtocols.
-    """
-
-    def write(packet):
-        """
-        Write packet to address we are connected to.
-        """
-
-    def getHost():
-        """
-        Returns L{UNIXAddress}.
-        """
-
-    def getPeer():
-        """
-        Returns L{UNIXAddress}.
-        """
-
-
-class IMulticastTransport(Interface):
-    """
-    Additional functionality for multicast UDP.
-    """
-
-    def getOutgoingInterface():
-        """
-        Return interface of outgoing multicast packets.
-        """
-
-    def setOutgoingInterface(addr):
-        """
-        Set interface for outgoing multicast packets.
-
-        Returns Deferred of success.
-        """
-
-    def getLoopbackMode():
-        """
-        Return if loopback mode is enabled.
-        """
-
-    def setLoopbackMode(mode):
-        """
-        Set if loopback mode is enabled.
-        """
-
-    def getTTL():
-        """
-        Get time to live for multicast packets.
-        """
-
-    def setTTL(ttl):
-        """
-        Set time to live on multicast packets.
-        """
-
-    def joinGroup(addr, interface=""):
-        """
-        Join a multicast group. Returns L{Deferred} of success or failure.
-
-        If an error occurs, the returned L{Deferred} will fail with
-        L{error.MulticastJoinError}.
-        """
-
-    def leaveGroup(addr, interface=""):
-        """
-        Leave multicast group, return L{Deferred} of success.
-        """
-
-
-class IStreamClientEndpoint(Interface):
-    """
-    A stream client endpoint is a place that L{ClientFactory} can connect to.
-    For example, a remote TCP host/port pair would be a TCP client endpoint.
-
-    @since: 10.1
-    """
-
-    def connect(protocolFactory):
-        """
-        Connect the C{protocolFactory} to the location specified by this
-        L{IStreamClientEndpoint} provider.
-
-        @param protocolFactory: A provider of L{IProtocolFactory}
-        @return: A L{Deferred} that results in an L{IProtocol} upon successful
-            connection otherwise a L{ConnectError}
-        """
-
-
-
-class IStreamServerEndpoint(Interface):
-    """
-    A stream server endpoint is a place that a L{Factory} can listen for
-    incoming connections.
-
-    @since: 10.1
-    """
-
-    def listen(protocolFactory):
-        """
-        Listen with C{protocolFactory} at the location specified by this
-        L{IStreamServerEndpoint} provider.
-
-        @param protocolFactory: A provider of L{IProtocolFactory}
-        @return: A L{Deferred} that results in an L{IListeningPort} or an
-            L{CannotListenError}
-        """
-
-
-
-class IStreamServerEndpointStringParser(Interface):
-    """
-    An L{IStreamServerEndpointStringParser} is like an
-    L{IStreamClientEndpointStringParser}, except for L{IStreamServerEndpoint}s
-    instead of clients.  It integrates with L{endpoints.serverFromString} in
-    much the same way.
-    """
-
-    prefix = Attribute(
-        """
-        @see: L{IStreamClientEndpointStringParser.prefix}
-        """
-    )
-
-
-    def parseStreamServer(reactor, *args, **kwargs):
-        """
-        Parse a stream server endpoint from a reactor and string-only arguments
-        and keyword arguments.
-
-        @see: L{IStreamClientEndpointStringParser.parseStreamClient}
-
-        @return: a stream server endpoint
-        @rtype: L{IStreamServerEndpoint}
-        """
-
-
-
-class IStreamClientEndpointStringParser(Interface):
-    """
-    An L{IStreamClientEndpointStringParser} is a parser which can convert
-    a set of string C{*args} and C{**kwargs} into an L{IStreamClientEndpoint}
-    provider.
-
-    This interface is really only useful in the context of the plugin system
-    for L{endpoints.clientFromString}.  See the document entitled "I{The
-    Twisted Plugin System}" for more details on how to write a plugin.
-
-    If you place an L{IStreamClientEndpointStringParser} plugin in the
-    C{twisted.plugins} package, that plugin's C{parseStreamClient} method will
-    be used to produce endpoints for any description string that begins with
-    the result of that L{IStreamClientEndpointStringParser}'s prefix attribute.
-    """
-
-    prefix = Attribute(
-        """
-        A C{str}, the description prefix to respond to.  For example, an
-        L{IStreamClientEndpointStringParser} plugin which had C{"foo"} for its
-        C{prefix} attribute would be called for endpoint descriptions like
-        C{"foo:bar:baz"} or C{"foo:"}.
-        """
-    )
-
-
-    def parseStreamClient(*args, **kwargs):
-        """
-        This method is invoked by L{endpoints.clientFromString}, if the type of
-        endpoint matches the return value from this
-        L{IStreamClientEndpointStringParser}'s C{prefix} method.
-
-        @param args: The string arguments, minus the endpoint type, in the
-            endpoint description string, parsed according to the rules
-            described in L{endpoints.quoteStringArgument}.  For example, if the
-            description were C{"my-type:foo:bar:baz=qux"}, C{args} would be
-            C{('foo','bar')}
-
-        @param kwargs: The string arguments from the endpoint description
-            passed as keyword arguments.  For example, if the description were
-            C{"my-type:foo:bar:baz=qux"}, C{kwargs} would be
-            C{dict(baz='qux')}.
-
-        @return: a client endpoint
-        @rtype: L{IStreamClientEndpoint}
-        """
diff --git a/scrapy/xlib/tx/iweb.py b/scrapy/xlib/tx/iweb.py
deleted file mode 100644
index ddcb6ed7a..000000000
--- a/scrapy/xlib/tx/iweb.py
+++ /dev/null
@@ -1,587 +0,0 @@
-# -*- test-case-name: twisted.web.test -*-
-# Copyright (c) Twisted Matrix Laboratories.
-# See LICENSE for details.
-
-"""
-Interface definitions for L{twisted.web}.
-
-@var UNKNOWN_LENGTH: An opaque object which may be used as the value of
-    L{IBodyProducer.length} to indicate that the length of the entity
-    body is not known in advance.
-"""
-
-from zope.interface import Interface, Attribute
-
-from twisted.internet.interfaces import IPushProducer
-
-
-class IRequest(Interface):
-    """
-    An HTTP request.
-
-    @since: 9.0
-    """
-
-    method = Attribute("A C{str} giving the HTTP method that was used.")
-    uri = Attribute(
-        "A C{str} giving the full encoded URI which was requested (including "
-        "query arguments).")
-    path = Attribute(
-        "A C{str} giving the encoded query path of the request URI.")
-    args = Attribute(
-        "A mapping of decoded query argument names as C{str} to "
-        "corresponding query argument values as C{list}s of C{str}.  "
-        "For example, for a URI with C{'foo=bar&foo=baz&quux=spam'} "
-        "for its query part, C{args} will be C{{'foo': ['bar', 'baz'], "
-        "'quux': ['spam']}}.")
-
-    received_headers = Attribute(
-        "Backwards-compatibility access to C{requestHeaders}.  Use "
-        "C{requestHeaders} instead.  C{received_headers} behaves mostly "
-        "like a C{dict} and does not provide access to all header values.")
-
-    requestHeaders = Attribute(
-        "A L{http_headers.Headers} instance giving all received HTTP request "
-        "headers.")
-
-    content = Attribute(
-        "A file-like object giving the request body.  This may be a file on "
-        "disk, a C{StringIO}, or some other type.  The implementation is free "
-        "to decide on a per-request basis.")
-
-    headers = Attribute(
-        "Backwards-compatibility access to C{responseHeaders}.  Use"
-        "C{responseHeaders} instead.  C{headers} behaves mostly like a "
-        "C{dict} and does not provide access to all header values nor "
-        "does it allow multiple values for one header to be set.")
-
-    responseHeaders = Attribute(
-        "A L{http_headers.Headers} instance holding all HTTP response "
-        "headers to be sent.")
-
-    def getHeader(key):
-        """
-        Get an HTTP request header.
-
-        @type key: C{str}
-        @param key: The name of the header to get the value of.
-
-        @rtype: C{str} or C{NoneType}
-        @return: The value of the specified header, or C{None} if that header
-            was not present in the request.
-        """
-
-
-    def getCookie(key):
-        """
-        Get a cookie that was sent from the network.
-        """
-
-
-    def getAllHeaders():
-        """
-        Return dictionary mapping the names of all received headers to the last
-        value received for each.
-
-        Since this method does not return all header information,
-        C{requestHeaders.getAllRawHeaders()} may be preferred.
-        """
-
-
-    def getRequestHostname():
-        """
-        Get the hostname that the user passed in to the request.
-
-        This will either use the Host: header (if it is available) or the
-        host we are listening on if the header is unavailable.
-
-        @returns: the requested hostname
-        @rtype: C{str}
-        """
-
-
-    def getHost():
-        """
-        Get my originally requesting transport's host.
-
-        @return: An L{IAddress<twisted.internet.interfaces.IAddress>}.
-        """
-
-
-    def getClientIP():
-        """
-        Return the IP address of the client who submitted this request.
-
-        @returns: the client IP address or C{None} if the request was submitted
-            over a transport where IP addresses do not make sense.
-        @rtype: L{str} or C{NoneType}
-        """
-
-
-    def getClient():
-        """
-        Return the hostname of the IP address of the client who submitted this
-        request, if possible.
-
-        This method is B{deprecated}.  See L{getClientIP} instead.
-
-        @rtype: C{NoneType} or L{str}
-        @return: The canonical hostname of the client, as determined by
-            performing a name lookup on the IP address of the client.
-        """
-
-
-    def getUser():
-        """
-        Return the HTTP user sent with this request, if any.
-
-        If no user was supplied, return the empty string.
-
-        @returns: the HTTP user, if any
-        @rtype: C{str}
-        """
-
-
-    def getPassword():
-        """
-        Return the HTTP password sent with this request, if any.
-
-        If no password was supplied, return the empty string.
-
-        @returns: the HTTP password, if any
-        @rtype: C{str}
-        """
-
-
-    def isSecure():
-        """
-        Return True if this request is using a secure transport.
-
-        Normally this method returns True if this request's HTTPChannel
-        instance is using a transport that implements ISSLTransport.
-
-        This will also return True if setHost() has been called
-        with ssl=True.
-
-        @returns: True if this request is secure
-        @rtype: C{bool}
-        """
-
-
-    def getSession(sessionInterface=None):
-        """
-        Look up the session associated with this request or create a new one if
-        there is not one.
-
-        @return: The L{Session} instance identified by the session cookie in
-            the request, or the C{sessionInterface} component of that session
-            if C{sessionInterface} is specified.
-        """
-
-
-    def URLPath():
-        """
-        @return: A L{URLPath} instance which identifies the URL for which this
-            request is.
-        """
-
-
-    def prePathURL():
-        """
-        @return: At any time during resource traversal, a L{str} giving an
-            absolute URL to the most nested resource which has yet been
-            reached.
-        """
-
-
-    def rememberRootURL():
-        """
-        Remember the currently-processed part of the URL for later
-        recalling.
-        """
-
-
-    def getRootURL():
-        """
-        Get a previously-remembered URL.
-        """
-
-
-    # Methods for outgoing response
-    def finish():
-        """
-        Indicate that the response to this request is complete.
-        """
-
-
-    def write(data):
-        """
-        Write some data to the body of the response to this request.  Response
-        headers are written the first time this method is called, after which
-        new response headers may not be added.
-        """
-
-
-    def addCookie(k, v, expires=None, domain=None, path=None, max_age=None, comment=None, secure=None):
-        """
-        Set an outgoing HTTP cookie.
-
-        In general, you should consider using sessions instead of cookies, see
-        L{twisted.web.server.Request.getSession} and the
-        L{twisted.web.server.Session} class for details.
-        """
-
-
-    def setResponseCode(code, message=None):
-        """
-        Set the HTTP response code.
-        """
-
-
-    def setHeader(k, v):
-        """
-        Set an HTTP response header.  Overrides any previously set values for
-        this header.
-
-        @type name: C{str}
-        @param name: The name of the header for which to set the value.
-
-        @type value: C{str}
-        @param value: The value to set for the named header.
-        """
-
-
-    def redirect(url):
-        """
-        Utility function that does a redirect.
-
-        The request should have finish() called after this.
-        """
-
-
-    def setLastModified(when):
-        """
-        Set the C{Last-Modified} time for the response to this request.
-
-        If I am called more than once, I ignore attempts to set Last-Modified
-        earlier, only replacing the Last-Modified time if it is to a later
-        value.
-
-        If I am a conditional request, I may modify my response code to
-        L{NOT_MODIFIED<http.NOT_MODIFIED>} if appropriate for the time given.
-
-        @param when: The last time the resource being returned was modified, in
-            seconds since the epoch.
-        @type when: L{int}, L{long} or L{float}
-
-        @return: If I am a C{If-Modified-Since} conditional request and the time
-            given is not newer than the condition, I return
-            L{CACHED<http.CACHED>} to indicate that you should write no body.
-            Otherwise, I return a false value.
-        """
-
-
-    def setETag(etag):
-        """
-        Set an C{entity tag} for the outgoing response.
-
-        That's "entity tag" as in the HTTP/1.1 I{ETag} header, "used for
-        comparing two or more entities from the same requested resource."
-
-        If I am a conditional request, I may modify my response code to
-        L{NOT_MODIFIED<http.NOT_MODIFIED>} or
-        L{PRECONDITION_FAILED<http.PRECONDITION_FAILED>}, if appropriate for the
-        tag given.
-
-        @param etag: The entity tag for the resource being returned.
-        @type etag: C{str}
-
-        @return: If I am a C{If-None-Match} conditional request and the tag
-            matches one in the request, I return L{CACHED<http.CACHED>} to
-            indicate that you should write no body.  Otherwise, I return a
-            false value.
-        """
-
-
-    def setHost(host, port, ssl=0):
-        """
-        Change the host and port the request thinks it's using.
-
-        This method is useful for working with reverse HTTP proxies (e.g.  both
-        Squid and Apache's mod_proxy can do this), when the address the HTTP
-        client is using is different than the one we're listening on.
-
-        For example, Apache may be listening on https://www.example.com, and
-        then forwarding requests to http://localhost:8080, but we don't want
-        HTML produced by Twisted to say 'http://localhost:8080', they should
-        say 'https://www.example.com', so we do::
-
-           request.setHost('www.example.com', 443, ssl=1)
-        """
-
-
-
-class ICredentialFactory(Interface):
-    """
-    A credential factory defines a way to generate a particular kind of
-    authentication challenge and a way to interpret the responses to these
-    challenges.  It creates
-    L{ICredentials<twisted.cred.credentials.ICredentials>} providers from
-    responses.  These objects will be used with L{twisted.cred} to authenticate
-    an authorize requests.
-    """
-    scheme = Attribute(
-        "A C{str} giving the name of the authentication scheme with which "
-        "this factory is associated.  For example, C{'basic'} or C{'digest'}.")
-
-
-    def getChallenge(request):
-        """
-        Generate a new challenge to be sent to a client.
-
-        @type peer: L{twisted.web.http.Request}
-        @param peer: The request the response to which this challenge will be
-            included.
-
-        @rtype: C{dict}
-        @return: A mapping from C{str} challenge fields to associated C{str}
-            values.
-        """
-
-
-    def decode(response, request):
-        """
-        Create a credentials object from the given response.
-
-        @type response: C{str}
-        @param response: scheme specific response string
-
-        @type request: L{twisted.web.http.Request}
-        @param request: The request being processed (from which the response
-            was taken).
-
-        @raise twisted.cred.error.LoginFailed: If the response is invalid.
-
-        @rtype: L{twisted.cred.credentials.ICredentials} provider
-        @return: The credentials represented by the given response.
-        """
-
-
-
-class IBodyProducer(IPushProducer):
-    """
-    Objects which provide L{IBodyProducer} write bytes to an object which
-    provides L{IConsumer<twisted.internet.interfaces.IConsumer>} by calling its
-    C{write} method repeatedly.
-
-    L{IBodyProducer} providers may start producing as soon as they have an
-    L{IConsumer<twisted.internet.interfaces.IConsumer>} provider.  That is, they
-    should not wait for a C{resumeProducing} call to begin writing data.
-
-    L{IConsumer.unregisterProducer<twisted.internet.interfaces.IConsumer.unregisterProducer>}
-    must not be called.  Instead, the
-    L{Deferred<twisted.internet.defer.Deferred>} returned from C{startProducing}
-    must be fired when all bytes have been written.
-
-    L{IConsumer.write<twisted.internet.interfaces.IConsumer.write>} may
-    synchronously invoke any of C{pauseProducing}, C{resumeProducing}, or
-    C{stopProducing}.  These methods must be implemented with this in mind.
-
-    @since: 9.0
-    """
-
-    # Despite the restrictions above and the additional requirements of
-    # stopProducing documented below, this interface still needs to be an
-    # IPushProducer subclass.  Providers of it will be passed to IConsumer
-    # providers which only know about IPushProducer and IPullProducer, not
-    # about this interface.  This interface needs to remain close enough to one
-    # of those interfaces for consumers to work with it.
-
-    length = Attribute(
-        """
-        C{length} is a C{int} indicating how many bytes in total this
-        L{IBodyProducer} will write to the consumer or L{UNKNOWN_LENGTH}
-        if this is not known in advance.
-        """)
-
-    def startProducing(consumer):
-        """
-        Start producing to the given
-        L{IConsumer<twisted.internet.interfaces.IConsumer>} provider.
-
-        @return: A L{Deferred<twisted.internet.defer.Deferred>} which fires with
-            C{None} when all bytes have been produced or with a
-            L{Failure<twisted.python.failure.Failure>} if there is any problem
-            before all bytes have been produced.
-        """
-
-
-    def stopProducing():
-        """
-        In addition to the standard behavior of
-        L{IProducer.stopProducing<twisted.internet.interfaces.IProducer.stopProducing>}
-        (stop producing data), make sure the
-        L{Deferred<twisted.internet.defer.Deferred>} returned by
-        C{startProducing} is never fired.
-        """
-
-
-
-class IRenderable(Interface):
-    """
-    An L{IRenderable} is an object that may be rendered by the
-    L{twisted.web.template} templating system.
-    """
-
-    def lookupRenderMethod(name):
-        """
-        Look up and return the render method associated with the given name.
-
-        @type name: C{str}
-        @param name: The value of a render directive encountered in the
-            document returned by a call to L{IRenderable.render}.
-
-        @return: A two-argument callable which will be invoked with the request
-            being responded to and the tag object on which the render directive
-            was encountered.
-        """
-
-
-    def render(request):
-        """
-        Get the document for this L{IRenderable}.
-
-        @type request: L{IRequest} provider or C{NoneType}
-        @param request: The request in response to which this method is being
-            invoked.
-
-        @return: An object which can be flattened.
-        """
-
-
-
-class ITemplateLoader(Interface):
-    """
-    A loader for templates; something usable as a value for
-    L{twisted.web.template.Element}'s C{loader} attribute.
-    """
-
-    def load():
-        """
-        Load a template suitable for rendering.
-
-        @return: a C{list} of C{list}s, C{unicode} objects, C{Element}s and
-            other L{IRenderable} providers.
-        """
-
-
-
-class IResponse(Interface):
-    """
-    An object representing an HTTP response received from an HTTP server.
-
-    @since: 11.1
-    """
-
-    version = Attribute(
-        "A three-tuple describing the protocol and protocol version "
-        "of the response.  The first element is of type C{str}, the second "
-        "and third are of type C{int}.  For example, C{('HTTP', 1, 1)}.")
-
-
-    code = Attribute("The HTTP status code of this response, as a C{int}.")
-
-
-    phrase = Attribute(
-        "The HTTP reason phrase of this response, as a C{str}.")
-
-
-    headers = Attribute("The HTTP response L{Headers} of this response.")
-
-
-    length = Attribute(
-        "The C{int} number of bytes expected to be in the body of this "
-        "response or L{UNKNOWN_LENGTH} if the server did not indicate how "
-        "many bytes to expect.  For I{HEAD} responses, this will be 0; if "
-        "the response includes a I{Content-Length} header, it will be "
-        "available in C{headers}.")
-
-
-    def deliverBody(protocol):
-        """
-        Register an L{IProtocol<twisted.internet.interfaces.IProtocol>} provider
-        to receive the response body.
-
-        The protocol will be connected to a transport which provides
-        L{IPushProducer}.  The protocol's C{connectionLost} method will be
-        called with:
-
-            - ResponseDone, which indicates that all bytes from the response
-              have been successfully delivered.
-
-            - PotentialDataLoss, which indicates that it cannot be determined
-              if the entire response body has been delivered.  This only occurs
-              when making requests to HTTP servers which do not set
-              I{Content-Length} or a I{Transfer-Encoding} in the response.
-
-            - ResponseFailed, which indicates that some bytes from the response
-              were lost.  The C{reasons} attribute of the exception may provide
-              more specific indications as to why.
-        """
-
-
-
-class _IRequestEncoder(Interface):
-    """
-    An object encoding data passed to L{IRequest.write}, for example for
-    compression purpose.
-
-    @since: 12.3
-    """
-
-    def encode(data):
-        """
-        Encode the data given and return the result.
-
-        @param data: The content to encode.
-        @type data: C{str}
-
-        @return: The encoded data.
-        @rtype: C{str}
-        """
-
-
-    def finish():
-        """
-        Callback called when the request is closing.
-
-        @return: If necessary, the pending data accumulated from previous
-            C{encode} calls.
-        @rtype: C{str}
-        """
-
-
-
-class _IRequestEncoderFactory(Interface):
-    """
-    A factory for returing L{_IRequestEncoder} instances.
-
-    @since: 12.3
-    """
-
-    def encoderForRequest(request):
-        """
-        If applicable, returns a L{_IRequestEncoder} instance which will encode
-        the request.
-        """
-
-
-
-UNKNOWN_LENGTH = u"twisted.web.iweb.UNKNOWN_LENGTH"
-
-__all__ = [
-    "ICredentialFactory", "IRequest",
-    "IBodyProducer", "IRenderable", "IResponse", "_IRequestEncoder",
-    "_IRequestEncoderFactory",
-
-    "UNKNOWN_LENGTH"]
diff --git a/sep/README b/sep/README.rst
similarity index 95%
rename from sep/README
rename to sep/README.rst
index 668772492..e2d2e6274 100644
--- a/sep/README
+++ b/sep/README.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 Scrapy Enhancement Proposals
 ============================
 
diff --git a/sep/sep-001.rst b/sep/sep-001.rst
index 2f0fe3500..00226283f 100644
--- a/sep/sep-001.rst
+++ b/sep/sep-001.rst
@@ -61,7 +61,7 @@ ItemForm
 --------
 
 Pros:
-- same API used for Items (see http://doc.scrapy.org/en/latest/topics/items.html)
+- same API used for Items (see https://docs.scrapy.org/en/latest/topics/items.html)
 - some people consider setitem API more elegant than methods API
 
 Cons:
@@ -254,8 +254,8 @@ ItemForm
 
    #!python
    class MySiteForm(ItemForm):
-       witdth = adaptor(ItemForm.witdh, default_unit='cm')
-       volume = adaptor(ItemForm.witdh, default_unit='lt')
+       width = adaptor(ItemForm.width, default_unit='cm')
+       volume = adaptor(ItemForm.width, default_unit='lt')
 
    ia['width'] = x.x('//p[@class="width"]')
    ia['volume'] = x.x('//p[@class="volume"]')
diff --git a/sep/sep-003.rst b/sep/sep-003.rst
index 282574968..e6357313d 100644
--- a/sep/sep-003.rst
+++ b/sep/sep-003.rst
@@ -18,7 +18,7 @@ Prerequisites
 
 This API proposal relies on the following API:
 
-1. instantiating a item with an item instance as its first argument (ie.
+1. instantiating a item with an item instance as its first argument (i.e.
    ``item2 = MyItem(item1)``) must return a **copy** of the first item
    instance)
 2. items can be instantiated using this syntax: ``item = Item(attr1=value1,
@@ -78,7 +78,7 @@ Defining an item containing ItemField's
        variants2 = ListField(ItemField(Variant), default=[])
 
 It's important to note here that the (perhaps most intuitive) way of defining a
-Product-Variant relationship (ie. defining a recursive !ItemField) doesn't
+Product-Variant relationship (i.e. defining a recursive !ItemField) doesn't
 work. For example, this fails to compile:
 
 ::
@@ -146,7 +146,7 @@ Default values
 
    p['numbers'] # returns []
 
-Accesing and changing nested item values
+Accessing and changing nested item values
 ----------------------------------------
 
 ::
diff --git a/sep/sep-004.rst b/sep/sep-004.rst
index 69edfa136..05b0eb99c 100644
--- a/sep/sep-004.rst
+++ b/sep/sep-004.rst
@@ -53,7 +53,7 @@ Here's a simple proof-of-concept code of such script:
    # ... do something more interesting with scraped_items ...
 
 The behaviour of the Scrapy crawler would be controller by the Scrapy settings,
-naturally, just like any typical scrapy project. But the default settings
+naturally, just like any typical Scrapy project. But the default settings
 should be sufficient so as to not require adding any specific setting. But, at
 the same time, you could do it if you need to, say, for specifying a custom
 middleware.
diff --git a/sep/sep-006.rst b/sep/sep-006.rst
index c0f945b66..eb362e945 100644
--- a/sep/sep-006.rst
+++ b/sep/sep-006.rst
@@ -10,13 +10,14 @@ Status   Obsolete (discarded)
 SEP-006: Rename of Selectors to Extractors
 ==========================================
 
-This SEP proposes a more meaningful naming of XPathSelectors or "Selectors" and their `x` method.
+This SEP proposes a more meaningful naming of XPathSelectors or "Selectors" and
+their ``x`` method.
 
 Motivation
 ==========
 
 When you use Selectors in Scrapy, your final goal is to "extract" the data that
-you've selected, as the [http://doc.scrapy.org/en/latest/topics/selectors.html
+you've selected, as the [https://docs.scrapy.org/en/latest/topics/selectors.html
 XPath Selectors documentation] says (bolding by me):
 
    When you’re scraping web pages, the most common task you need to perform is
@@ -57,8 +58,8 @@ Additional changes
 As the name of the method for performing selection (the ``x`` method) is not
 descriptive nor mnemotechnic enough and clearly clashes with ``extract`` method
 (x sounds like a short for extract in english), we propose to rename it to
-`select`, `sel` (is shortness if required), or `xpath` after `lxml's
-<http://codespeak.net/lxml/xpathxslt.html>`_ ``xpath`` method.
+``select``, ``sel`` (is shortness if required), or ``xpath`` after `lxml's
+<http://lxml.de/xpathxslt.html>`_ ``xpath`` method.
 
 Bonus (ItemBuilder)
 ===================
@@ -71,5 +72,5 @@ webpage or set of pages.
 References
 ==========
 
- 1. XPath Selectors (http://doc.scrapy.org/topics/selectors.html)
- 2. XPath and XSLT with lxml (http://codespeak.net/lxml/xpathxslt.html)
+ 1. XPath Selectors (https://docs.scrapy.org/topics/selectors.html)
+ 2. XPath and XSLT with lxml (http://lxml.de/xpathxslt.html)
diff --git a/sep/sep-009.rst b/sep/sep-009.rst
index 232a536a8..da87fa9aa 100644
--- a/sep/sep-009.rst
+++ b/sep/sep-009.rst
@@ -38,7 +38,7 @@ singletons members of that object, as explained below:
   ``scrapy.core.manager.ExecutionManager``) - instantiated with a ``Settings``
   object
 
-   - **crawler.settings**: ``scrapy.conf.Settings`` instance (passed in the constructor)
+   - **crawler.settings**: ``scrapy.conf.Settings`` instance (passed in the ``__init__`` method)
    - **crawler.extensions**: ``scrapy.extension.ExtensionManager`` instance
    - **crawler.engine**: ``scrapy.core.engine.ExecutionEngine`` instance
       - ``crawler.engine.scheduler``
@@ -55,7 +55,7 @@ singletons members of that object, as explained below:
      ``STATS_CLASS`` setting)
    - **crawler.log**: Logger class with methods replacing the current
      ``scrapy.log`` functions. Logging would be started (if enabled) on
-     ``Crawler`` constructor, so no log starting functions are required.
+     ``Crawler`` instantiation, so no log starting functions are required.
 
       - ``crawler.log.msg``
    - **crawler.signals**: signal handling
@@ -69,12 +69,12 @@ Required code changes after singletons removal
 ==============================================
 
 All components (extensions, middlewares, etc) will receive this ``Crawler``
-object in their constructors, and this will be the only mechanism for accessing
+object in their ``__init__`` methods, and this will be the only mechanism for accessing
 any other components (as opposed to importing each singleton from their
 respective module). This will also serve to stabilize the core API, something
 which we haven't documented so far (partly because of this).
 
-So, for a typical middleware constructor code, instead of this:
+So, for a typical middleware ``__init__`` method code, instead of this:
 
 ::
 
@@ -125,13 +125,13 @@ Open issues to resolve
 
 - Should we pass ``Settings`` object to ``ScrapyCommand.add_options()``?
 - How should spiders access settings?
-   - Option 1. Pass ``Crawler`` object to spider constructors too
+   - Option 1. Pass ``Crawler`` object to spider ``__init__`` methods too
       - pro: one way to access all components (settings and signals being the
         most relevant to spiders)
       - con?: spider code can access (and control) any crawler component -
         since we don't want to support spiders messing with the crawler (write
         an extension or spider middleware if you need that)
-   - Option 2. Pass ``Settings`` object to spider constructors, which would
+   - Option 2. Pass ``Settings`` object to spider ``__init__`` methods, which would
      then be accessed through ``self.settings``, like logging which is accessed
      through ``self.log``
 
diff --git a/sep/sep-013.rst b/sep/sep-013.rst
index 4c11a0762..4bc9abd30 100644
--- a/sep/sep-013.rst
+++ b/sep/sep-013.rst
@@ -44,7 +44,7 @@ Overview of changes proposed
 
 Most of the inconsistencies come from the fact that middlewares don't follow
 the typical
-[http://twistedmatrix.com/projects/core/documentation/howto/defer.html
+[https://twistedmatrix.com/projects/core/documentation/howto/defer.html
 deferred] callback/errback chaining logic. Twisted logic is fine and quite
 intuitive, and also fits middlewares very well. Due to some bad design choices
 the integration between middleware calls and deferred is far from optional. So
@@ -59,7 +59,7 @@ Global changes to all middlewares
 
 To be discussed:
 
-1. should we support returning deferreds (ie. ``maybeDeferred``) in middleware
+1. should we support returning deferreds (i.e. ``maybeDeferred``) in middleware
    methods?
 2. should we pass Twisted Failures instead of exceptions to error methods?
 
diff --git a/sep/sep-014.rst b/sep/sep-014.rst
index 98a31b1aa..8ca81824d 100644
--- a/sep/sep-014.rst
+++ b/sep/sep-014.rst
@@ -54,7 +54,7 @@ Request Extractors
 
 Request Extractors takes response object and determines which requests follow.
 
-This is an enhancemente to ``LinkExtractors`` which returns urls (links),
+This is an enhancement to ``LinkExtractors`` which returns urls (links),
 Request Extractors return Request objects. 
 
 Request Processors
diff --git a/sep/sep-017.rst b/sep/sep-017.rst
index 7707a1622..86005e3c9 100644
--- a/sep/sep-017.rst
+++ b/sep/sep-017.rst
@@ -13,7 +13,7 @@ SEP-017: Spider Contracts
 The motivation for Spider Contracts is to build a lightweight mechanism for
 testing your spiders, and be able to run the tests quickly without having to
 wait for all the spider to run. It's partially based on the
-[http://en.wikipedia.org/wiki/Design_by_contract Design by contract]  approach
+[https://en.wikipedia.org/wiki/Design_by_contract Design by contract]  approach
 (hence its name) where you define certain conditions that spider callbacks must
 met, and you give example testing pages.
 
diff --git a/sep/sep-018.rst b/sep/sep-018.rst
index e30821917..fe707923a 100644
--- a/sep/sep-018.rst
+++ b/sep/sep-018.rst
@@ -211,7 +211,7 @@ spider methods on each event such as:
 - call additional spider middlewares defined in the ``Spider.middlewares``
   attribute
 - call ``Spider.next_request()`` and ``Spider.start_requests()`` on
-  ``next_request()`` middleware method (this would implicitly support backwards
+  ``next_request()`` middleware method (this would implicitly support backward
   compatibility)
 
 Differences with Spider middleware v1
@@ -477,7 +477,7 @@ This is a port of the Offsite middleware to the new spider middleware API:
     
        def should_follow(self, request, spider):
            info = self.spiders[spider]
-           # hostanme can be None for wrong urls (like javascript links)
+           # hostname can be None for wrong urls (like javascript links)
            host = urlparse_cached(request).hostname or ''
            return bool(info.regex.search(host))
 
diff --git a/sep/sep-019.rst b/sep/sep-019.rst
index 6c5e8bdd9..84f3a96c3 100644
--- a/sep/sep-019.rst
+++ b/sep/sep-019.rst
@@ -3,7 +3,7 @@ SEP      19
 Title    Per-spider settings
 Author   Pablo Hoffman, Nicolás Ramirez, Julia Medina
 Created  2013-03-07
-Status   Draft
+Status   Final (implemented with minor variations)
 =======  ===================
 
 ======================================================
@@ -185,7 +185,7 @@ These ideas translate to the following changes on the ``SpiderManager`` class:
   will return a spider class, not an instance. It's basically a ``__get__``
   to ``self._spiders``.
 
-- All remaining functions should be deprecated or remove accordantly, since a
+- All remaining functions should be deprecated or remove accordingly, since a
   crawler reference is no longer needed.
 
 - New helper ``get_spider_manager_class_from_scrapycfg`` in
diff --git a/sep/sep-020.rst b/sep/sep-020.rst
index 7b2c043b7..52d78097b 100644
--- a/sep/sep-020.rst
+++ b/sep/sep-020.rst
@@ -23,13 +23,13 @@ Rationale
 =========
 
 There are certain markup patterns that lend themselves quite nicely to
-automated parsing, for example the ``<table>`` tag outlilnes such a pattern
+automated parsing, for example the ``<table>`` tag outlines such a pattern
 for populating a database table with the embedded ``<tr>`` elements denoting
-the rows and the furthur embedded ``<td>`` elements denoting the individual
+the rows and the further embedded ``<td>`` elements denoting the individual
 fields.
 
 One pattern that is particularly well suited for auto-populating an Item Loader
-is the `definition list <http://www.w3.org/TR/html401/struct/lists.html#h-10.3>`_::
+is the `definition list <https://www.w3.org/TR/html401/struct/lists.html#h-10.3>`_::
 
     <div class="geeks">
         <dl>
diff --git a/sep/sep-021.rst b/sep/sep-021.rst
index 628a95dd2..372429791 100644
--- a/sep/sep-021.rst
+++ b/sep/sep-021.rst
@@ -38,7 +38,7 @@ Goals:
 
 * simple to manage: adding or removing extensions should be just a matter of
   adding or removing lines in a ``scrapy.cfg`` file
-* backward compatibility with enabling extension the "old way" (ie. modifying
+* backward compatibility with enabling extension the "old way" (i.e. modifying
   settings directly)
 
 Non-goals:
diff --git a/setup.cfg b/setup.cfg
index 5e219a08d..3a624ec94 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,2 +1,179 @@
 [bdist_rpm]
 doc_files = docs AUTHORS INSTALL LICENSE README.rst
+
+[bdist_wheel]
+universal=1
+
+[mypy]
+ignore_missing_imports = true
+follow_imports = skip
+
+# FIXME: remove the following sections once the issues are solved
+
+[mypy-scrapy]
+ignore_errors = True
+
+[mypy-scrapy.commands]
+ignore_errors = True
+
+[mypy-scrapy.commands.bench]
+ignore_errors = True
+
+[mypy-scrapy.commands.parse]
+ignore_errors = True
+
+[mypy-scrapy.downloadermiddlewares.httpproxy]
+ignore_errors = True
+
+[mypy-scrapy.contracts]
+ignore_errors = True
+
+[mypy-scrapy.core.spidermw]
+ignore_errors = True
+
+[mypy-scrapy.interfaces]
+ignore_errors = True
+
+[mypy-scrapy.item]
+ignore_errors = True
+
+[mypy-scrapy.http.cookies]
+ignore_errors = True
+
+[mypy-scrapy.mail]
+ignore_errors = True
+
+[mypy-scrapy.pipelines.images]
+ignore_errors = True
+
+[mypy-scrapy.settings.default_settings]
+ignore_errors = True
+
+[mypy-scrapy.spidermiddlewares.referer]
+ignore_errors = True
+
+[mypy-scrapy.utils.httpobj]
+ignore_errors = True
+
+[mypy-scrapy.utils.request]
+ignore_errors = True
+
+[mypy-scrapy.utils.response]
+ignore_errors = True
+
+[mypy-scrapy.utils.spider]
+ignore_errors = True
+
+[mypy-scrapy.utils.trackref]
+ignore_errors = True
+
+[mypy-tests.mocks.dummydbm]
+ignore_errors = True
+
+[mypy-tests.spiders]
+ignore_errors = True
+
+[mypy-tests.test_cmdline_crawl_with_pipeline.test_spider.spiders.exception]
+ignore_errors = True
+
+[mypy-tests.test_cmdline_crawl_with_pipeline.test_spider.spiders.normal]
+ignore_errors = True
+
+[mypy-tests.test_command_fetch]
+ignore_errors = True
+
+[mypy-tests.test_command_parse]
+ignore_errors = True
+
+[mypy-tests.test_command_shell]
+ignore_errors = True
+
+[mypy-tests.test_command_version]
+ignore_errors = True
+
+[mypy-tests.test_contracts]
+ignore_errors = True
+
+[mypy-tests.test_crawler]
+ignore_errors = True
+
+[mypy-tests.test_downloader_handlers]
+ignore_errors = True
+
+[mypy-tests.test_engine]
+ignore_errors = True
+
+[mypy-tests.test_exporters]
+ignore_errors = True
+
+[mypy-tests.test_http_request]
+ignore_errors = True
+
+[mypy-tests.test_linkextractors]
+ignore_errors = True
+
+[mypy-tests.test_loader]
+ignore_errors = True
+
+[mypy-tests.test_loader_deprecated]
+ignore_errors = True
+
+[mypy-tests.test_pipeline_crawl]
+ignore_errors = True
+
+[mypy-tests.test_pipeline_files]
+ignore_errors = True
+
+[mypy-tests.test_pipeline_images]
+ignore_errors = True
+
+[mypy-tests.test_pipelines]
+ignore_errors = True
+
+[mypy-tests.test_request_attribute_binding]
+ignore_errors = True
+
+[mypy-tests.test_request_cb_kwargs]
+ignore_errors = True
+
+[mypy-tests.test_request_left]
+ignore_errors = True
+
+[mypy-tests.test_scheduler]
+ignore_errors = True
+
+[mypy-tests.test_signals]
+ignore_errors = True
+
+[mypy-tests.test_spiderloader.test_spiders.nested.spider4]
+ignore_errors = True
+
+[mypy-tests.test_spiderloader.test_spiders.spider1]
+ignore_errors = True
+
+[mypy-tests.test_spiderloader.test_spiders.spider2]
+ignore_errors = True
+
+[mypy-tests.test_spiderloader.test_spiders.spider3]
+ignore_errors = True
+
+[mypy-tests.test_spidermiddleware_httperror]
+ignore_errors = True
+
+[mypy-tests.test_spidermiddleware_output_chain]
+ignore_errors = True
+
+[mypy-tests.test_spidermiddleware_referer]
+ignore_errors = True
+
+[mypy-tests.test_utils_reqser]
+ignore_errors = True
+
+[mypy-tests.test_utils_serialize]
+ignore_errors = True
+
+[mypy-tests.test_utils_spider]
+ignore_errors = True
+
+[mypy-tests.test_utils_url]
+ignore_errors = True
diff --git a/setup.py b/setup.py
index b5732cbc2..52a27c368 100644
--- a/setup.py
+++ b/setup.py
@@ -1,16 +1,67 @@
 from os.path import dirname, join
-from setuptools import setup, find_packages
+from pkg_resources import parse_version
+from setuptools import setup, find_packages, __version__ as setuptools_version
 
 
 with open(join(dirname(__file__), 'scrapy/VERSION'), 'rb') as f:
     version = f.read().decode('ascii').strip()
 
 
+def has_environment_marker_platform_impl_support():
+    """Code extracted from 'pytest/setup.py'
+    https://github.com/pytest-dev/pytest/blob/7538680c/setup.py#L31
+
+    The first known release to support environment marker with range operators
+    it is 18.5, see:
+    https://setuptools.readthedocs.io/en/latest/history.html#id235
+    """
+    return parse_version(setuptools_version) >= parse_version('18.5')
+
+
+install_requires = [
+    'Twisted>=17.9.0',
+    'cryptography>=2.0',
+    'cssselect>=0.9.1',
+    'itemloaders>=1.0.1',
+    'parsel>=1.5.0',
+    'PyDispatcher>=2.0.5',
+    'pyOpenSSL>=16.2.0',
+    'queuelib>=1.4.2',
+    'service_identity>=16.0.0',
+    'w3lib>=1.17.0',
+    'zope.interface>=4.1.3',
+    'protego>=0.1.15',
+    'itemadapter>=0.1.0',
+]
+extras_require = {}
+
+if has_environment_marker_platform_impl_support():
+    extras_require[':platform_python_implementation == "CPython"'] = [
+        'lxml>=3.5.0',
+    ]
+    extras_require[':platform_python_implementation == "PyPy"'] = [
+        # Earlier lxml versions are affected by
+        # https://foss.heptapod.net/pypy/pypy/-/issues/2498,
+        # which was fixed in Cython 0.26, released on 2017-06-19, and used to
+        # generate the C headers of lxml release tarballs published since then, the
+        # first of which was:
+        'lxml>=4.0.0',
+        'PyPyDispatcher>=2.1.0',
+    ]
+else:
+    install_requires.append('lxml>=3.5.0')
+
+
 setup(
     name='Scrapy',
     version=version,
-    url='http://scrapy.org',
-    description='A high-level Python Screen Scraping framework',
+    url='https://scrapy.org',
+    project_urls = {
+        'Documentation': 'https://docs.scrapy.org/',
+        'Source': 'https://github.com/scrapy/scrapy',
+        'Tracker': 'https://github.com/scrapy/scrapy/issues',
+    },
+    description='A high-level Web Crawling and Web Scraping framework',
     long_description=open('README.rst').read(),
     author='Scrapy developers',
     maintainer='Pablo Hoffman',
@@ -30,19 +81,18 @@ setup(
         'License :: OSI Approved :: BSD License',
         'Operating System :: OS Independent',
         'Programming Language :: Python',
-        'Programming Language :: Python :: 2',
-        'Programming Language :: Python :: 2.7',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.5',
+        'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
+        'Programming Language :: Python :: Implementation :: CPython',
+        'Programming Language :: Python :: Implementation :: PyPy',
         'Topic :: Internet :: WWW/HTTP',
         'Topic :: Software Development :: Libraries :: Application Frameworks',
         'Topic :: Software Development :: Libraries :: Python Modules',
     ],
-    install_requires=[
-        'Twisted>=10.0.0',
-        'w3lib>=1.8.0',
-        'queuelib',
-        'lxml',
-        'pyOpenSSL',
-        'cssselect>=0.9',
-        'six>=1.5.2',
-    ],
+    python_requires='>=3.5.2',
+    install_requires=install_requires,
+    extras_require=extras_require,
 )
diff --git a/tests/CrawlerProcess/alternative_name_resolver.py b/tests/CrawlerProcess/alternative_name_resolver.py
new file mode 100644
index 000000000..2c466da04
--- /dev/null
+++ b/tests/CrawlerProcess/alternative_name_resolver.py
@@ -0,0 +1,15 @@
+import scrapy
+from scrapy.crawler import CrawlerProcess
+
+
+class IPv6Spider(scrapy.Spider):
+    name = "ipv6_spider"
+    start_urls = ["http://[::1]"]
+
+
+process = CrawlerProcess(settings={
+    "RETRY_ENABLED": False,
+    "DNS_RESOLVER": "scrapy.resolver.CachingHostnameResolver",
+})
+process.crawl(IPv6Spider)
+process.start()
diff --git a/tests/CrawlerProcess/asyncio_custom_loop.py b/tests/CrawlerProcess/asyncio_custom_loop.py
new file mode 100644
index 000000000..1e4ada722
--- /dev/null
+++ b/tests/CrawlerProcess/asyncio_custom_loop.py
@@ -0,0 +1,17 @@
+import scrapy
+from scrapy.crawler import CrawlerProcess
+
+
+class NoRequestsSpider(scrapy.Spider):
+    name = 'no_request'
+
+    def start_requests(self):
+        return []
+
+
+process = CrawlerProcess(settings={
+    "TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor",
+    "ASYNCIO_EVENT_LOOP": "uvloop.Loop"
+})
+process.crawl(NoRequestsSpider)
+process.start()
diff --git a/tests/CrawlerProcess/asyncio_enabled_no_reactor.py b/tests/CrawlerProcess/asyncio_enabled_no_reactor.py
new file mode 100644
index 000000000..d1e4a7bb5
--- /dev/null
+++ b/tests/CrawlerProcess/asyncio_enabled_no_reactor.py
@@ -0,0 +1,16 @@
+import scrapy
+from scrapy.crawler import CrawlerProcess
+
+
+class NoRequestsSpider(scrapy.Spider):
+    name = 'no_request'
+
+    def start_requests(self):
+        return []
+
+
+process = CrawlerProcess(settings={
+    "TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor",
+})
+process.crawl(NoRequestsSpider)
+process.start()
diff --git a/tests/CrawlerProcess/asyncio_enabled_reactor.py b/tests/CrawlerProcess/asyncio_enabled_reactor.py
new file mode 100644
index 000000000..8568bd8b8
--- /dev/null
+++ b/tests/CrawlerProcess/asyncio_enabled_reactor.py
@@ -0,0 +1,21 @@
+import asyncio
+
+from twisted.internet import asyncioreactor
+asyncioreactor.install(asyncio.get_event_loop())
+
+import scrapy
+from scrapy.crawler import CrawlerProcess
+
+
+class NoRequestsSpider(scrapy.Spider):
+    name = 'no_request'
+
+    def start_requests(self):
+        return []
+
+
+process = CrawlerProcess(settings={
+    "TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor",
+})
+process.crawl(NoRequestsSpider)
+process.start()
diff --git a/tests/CrawlerProcess/default_name_resolver.py b/tests/CrawlerProcess/default_name_resolver.py
new file mode 100644
index 000000000..60d91b68b
--- /dev/null
+++ b/tests/CrawlerProcess/default_name_resolver.py
@@ -0,0 +1,12 @@
+import scrapy
+from scrapy.crawler import CrawlerProcess
+
+
+class IPv6Spider(scrapy.Spider):
+    name = "ipv6_spider"
+    start_urls = ["http://[::1]"]
+
+
+process = CrawlerProcess(settings={"RETRY_ENABLED": False})
+process.crawl(IPv6Spider)
+process.start()
diff --git a/tests/CrawlerProcess/simple.py b/tests/CrawlerProcess/simple.py
new file mode 100644
index 000000000..5f6f1ae30
--- /dev/null
+++ b/tests/CrawlerProcess/simple.py
@@ -0,0 +1,15 @@
+import scrapy
+from scrapy.crawler import CrawlerProcess
+
+
+class NoRequestsSpider(scrapy.Spider):
+    name = 'no_request'
+
+    def start_requests(self):
+        return []
+
+
+process = CrawlerProcess(settings={})
+
+process.crawl(NoRequestsSpider)
+process.start()
diff --git a/tests/CrawlerProcess/twisted_reactor_asyncio.py b/tests/CrawlerProcess/twisted_reactor_asyncio.py
new file mode 100644
index 000000000..c6cbf949b
--- /dev/null
+++ b/tests/CrawlerProcess/twisted_reactor_asyncio.py
@@ -0,0 +1,13 @@
+import scrapy
+from scrapy.crawler import CrawlerProcess
+
+
+class AsyncioReactorSpider(scrapy.Spider):
+    name = 'asyncio_reactor'
+
+
+process = CrawlerProcess(settings={
+    "TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor",
+})
+process.crawl(AsyncioReactorSpider)
+process.start()
diff --git a/tests/CrawlerProcess/twisted_reactor_poll.py b/tests/CrawlerProcess/twisted_reactor_poll.py
new file mode 100644
index 000000000..27063260b
--- /dev/null
+++ b/tests/CrawlerProcess/twisted_reactor_poll.py
@@ -0,0 +1,13 @@
+import scrapy
+from scrapy.crawler import CrawlerProcess
+
+
+class PollReactorSpider(scrapy.Spider):
+    name = 'poll_reactor'
+
+
+process = CrawlerProcess(settings={
+    "TWISTED_REACTOR": "twisted.internet.pollreactor.PollReactor",
+})
+process.crawl(PollReactorSpider)
+process.start()
diff --git a/tests/CrawlerProcess/twisted_reactor_select.py b/tests/CrawlerProcess/twisted_reactor_select.py
new file mode 100644
index 000000000..9af8ceb4d
--- /dev/null
+++ b/tests/CrawlerProcess/twisted_reactor_select.py
@@ -0,0 +1,13 @@
+import scrapy
+from scrapy.crawler import CrawlerProcess
+
+
+class SelectReactorSpider(scrapy.Spider):
+    name = 'epoll_reactor'
+
+
+process = CrawlerProcess(settings={
+    "TWISTED_REACTOR": "twisted.internet.selectreactor.SelectReactor",
+})
+process.crawl(SelectReactorSpider)
+process.start()
diff --git a/tests/CrawlerRunner/ip_address.py b/tests/CrawlerRunner/ip_address.py
new file mode 100644
index 000000000..3f9738798
--- /dev/null
+++ b/tests/CrawlerRunner/ip_address.py
@@ -0,0 +1,50 @@
+from urllib.parse import urlparse
+
+from twisted.internet import reactor
+from twisted.names import cache, hosts as hostsModule, resolve
+from twisted.names.client import Resolver
+from twisted.python.runtime import platform
+
+from scrapy import Spider, Request
+from scrapy.crawler import CrawlerRunner
+from scrapy.utils.log import configure_logging
+
+from tests.mockserver import MockServer, MockDNSServer
+
+
+# https://stackoverflow.com/a/32784190
+def createResolver(servers=None, resolvconf=None, hosts=None):
+    if hosts is None:
+        hosts = b'/etc/hosts' if platform.getType() == 'posix' else r'c:\windows\hosts'
+    theResolver = Resolver(resolvconf, servers)
+    hostResolver = hostsModule.Resolver(hosts)
+    chain = [hostResolver, cache.CacheResolver(), theResolver]
+    return resolve.ResolverChain(chain)
+
+
+class LocalhostSpider(Spider):
+    name = "localhost_spider"
+
+    def start_requests(self):
+        yield Request(self.url)
+
+    def parse(self, response):
+        netloc = urlparse(response.url).netloc
+        self.logger.info("Host: %s" % netloc.split(":")[0])
+        self.logger.info("Type: %s" % type(response.ip_address))
+        self.logger.info("IP address: %s" % response.ip_address)
+
+
+if __name__ == "__main__":
+    with MockServer() as mock_http_server, MockDNSServer() as mock_dns_server:
+        port = urlparse(mock_http_server.http_address).port
+        url = "http://not.a.real.domain:{port}/echo".format(port=port)
+
+        servers = [(mock_dns_server.host, mock_dns_server.port)]
+        reactor.installResolver(createResolver(servers=servers))
+
+        configure_logging()
+        runner = CrawlerRunner()
+        d = runner.crawl(LocalhostSpider, url=url)
+        d.addBoth(lambda _: reactor.stop())
+        reactor.run()
diff --git a/tests/__init__.py b/tests/__init__.py
index 54e79b318..12ce79fa9 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1,14 +1,32 @@
 """
 tests: this package contains all Scrapy unittests
 
-see http://doc.scrapy.org/en/latest/contributing.html#running-tests
+see https://docs.scrapy.org/en/latest/contributing.html#running-tests
 """
 
 import os
 
-tests_datadir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'sample_data')
+# ignore system-wide proxies for tests
+# which would send requests to a totally unsuspecting server
+# (e.g. because urllib does not fully understand the proxy spec)
+os.environ['http_proxy'] = ''
+os.environ['https_proxy'] = ''
+os.environ['ftp_proxy'] = ''
+
+# Absolutize paths to coverage config and output file because tests that
+# spawn subprocesses also changes current working directory.
+_sourceroot = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if 'COV_CORE_CONFIG' in os.environ:
+    os.environ['COVERAGE_FILE'] = os.path.join(_sourceroot, '.coverage')
+    os.environ['COV_CORE_CONFIG'] = os.path.join(_sourceroot,
+                                                 os.environ['COV_CORE_CONFIG'])
+
+tests_datadir = os.path.join(os.path.abspath(os.path.dirname(__file__)),
+                             'sample_data')
+
 
 def get_testdata(*paths):
     """Return test data"""
     path = os.path.join(tests_datadir, *paths)
-    return open(path, 'rb').read()
+    with open(path, 'rb') as f:
+        return f.read()
diff --git a/tests/constraints.txt b/tests/constraints.txt
new file mode 100644
index 000000000..5655ac2d3
--- /dev/null
+++ b/tests/constraints.txt
@@ -0,0 +1 @@
+Twisted!=18.4.0
\ No newline at end of file
diff --git a/tests/ftpserver.py b/tests/ftpserver.py
new file mode 100644
index 000000000..6f0289e08
--- /dev/null
+++ b/tests/ftpserver.py
@@ -0,0 +1,24 @@
+from argparse import ArgumentParser
+
+from pyftpdlib.authorizers import DummyAuthorizer
+from pyftpdlib.handlers import FTPHandler
+from pyftpdlib.servers import FTPServer
+
+
+def main():
+    parser = ArgumentParser()
+    parser.add_argument('-d', '--directory')
+    args = parser.parse_args()
+
+    authorizer = DummyAuthorizer()
+    full_permissions = 'elradfmwMT'
+    authorizer.add_anonymous(args.directory, perm=full_permissions)
+    handler = FTPHandler
+    handler.authorizer = authorizer
+    address = ('127.0.0.1', 2121)
+    server = FTPServer(address, handler)
+    server.serve_forever()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tests/ignores.txt b/tests/ignores.txt
new file mode 100644
index 000000000..222288841
--- /dev/null
+++ b/tests/ignores.txt
@@ -0,0 +1,3 @@
+scrapy/downloadermiddlewares/cookies.py
+scrapy/extensions/statsmailer.py
+scrapy/extensions/memusage.py
diff --git a/tests/keys/cert.pem b/tests/keys/cert.pem
deleted file mode 100644
index 65478765e..000000000
--- a/tests/keys/cert.pem
+++ /dev/null
@@ -1,36 +0,0 @@
------BEGIN CERTIFICATE-----
-MIIDBjCCAm+gAwIBAgIBATANBgkqhkiG9w0BAQQFADB7MQswCQYDVQQGEwJTRzER
-MA8GA1UEChMITTJDcnlwdG8xFDASBgNVBAsTC00yQ3J5cHRvIENBMSQwIgYDVQQD
-ExtNMkNyeXB0byBDZXJ0aWZpY2F0ZSBNYXN0ZXIxHTAbBgkqhkiG9w0BCQEWDm5n
-cHNAcG9zdDEuY29tMB4XDTAwMDkxMDA5NTEzMFoXDTAyMDkxMDA5NTEzMFowUzEL
-MAkGA1UEBhMCU0cxETAPBgNVBAoTCE0yQ3J5cHRvMRIwEAYDVQQDEwlsb2NhbGhv
-c3QxHTAbBgkqhkiG9w0BCQEWDm5ncHNAcG9zdDEuY29tMFwwDQYJKoZIhvcNAQEB
-BQADSwAwSAJBAKy+e3dulvXzV7zoTZWc5TzgApr8DmeQHTYC8ydfzH7EECe4R1Xh
-5kwIzOuuFfn178FBiS84gngaNcrFi0Z5fAkCAwEAAaOCAQQwggEAMAkGA1UdEwQC
-MAAwLAYJYIZIAYb4QgENBB8WHU9wZW5TU0wgR2VuZXJhdGVkIENlcnRpZmljYXRl
-MB0GA1UdDgQWBBTPhIKSvnsmYsBVNWjj0m3M2z0qVTCBpQYDVR0jBIGdMIGagBT7
-hyNp65w6kxXlxb8pUU/+7Sg4AaF/pH0wezELMAkGA1UEBhMCU0cxETAPBgNVBAoT
-CE0yQ3J5cHRvMRQwEgYDVQQLEwtNMkNyeXB0byBDQTEkMCIGA1UEAxMbTTJDcnlw
-dG8gQ2VydGlmaWNhdGUgTWFzdGVyMR0wGwYJKoZIhvcNAQkBFg5uZ3BzQHBvc3Qx
-LmNvbYIBADANBgkqhkiG9w0BAQQFAAOBgQA7/CqT6PoHycTdhEStWNZde7M/2Yc6
-BoJuVwnW8YxGO8Sn6UJ4FeffZNcYZddSDKosw8LtPOeWoK3JINjAk5jiPQ2cww++
-7QGG/g5NDjxFZNDJP1dGiLAxPW6JXwov4v0FmdzfLOZ01jDcgQQZqEpYlgpuI5JE
-WUQ9Ho4EzbYCOQ==
------END CERTIFICATE-----
------BEGIN RSA PRIVATE KEY-----
-MIIBPAIBAAJBAKy+e3dulvXzV7zoTZWc5TzgApr8DmeQHTYC8ydfzH7EECe4R1Xh
-5kwIzOuuFfn178FBiS84gngaNcrFi0Z5fAkCAwEAAQJBAIqm/bz4NA1H++Vx5Ewx
-OcKp3w19QSaZAwlGRtsUxrP7436QjnREM3Bm8ygU11BjkPVmtrKm6AayQfCHqJoT
-ZIECIQDW0BoMoL0HOYM/mrTLhaykYAVqgIeJsPjvkEhTFXWBuQIhAM3deFAvWNu4
-nklUQ37XsCT2c9tmNt1LAT+slG2JOTTRAiAuXDtC/m3NYVwyHfFm+zKHRzHkClk2
-HjubeEgjpj32AQIhAJqMGTaZVOwevTXvvHwNEH+vRWsAYU/gbx+OQB+7VOcBAiEA
-oolb6NMg/R3enNPvS1O4UU1H8wpaF77L4yiSWlE0p4w=
------END RSA PRIVATE KEY-----
------BEGIN CERTIFICATE REQUEST-----
-MIIBDTCBuAIBADBTMQswCQYDVQQGEwJTRzERMA8GA1UEChMITTJDcnlwdG8xEjAQ
-BgNVBAMTCWxvY2FsaG9zdDEdMBsGCSqGSIb3DQEJARYObmdwc0Bwb3N0MS5jb20w
-XDANBgkqhkiG9w0BAQEFAANLADBIAkEArL57d26W9fNXvOhNlZzlPOACmvwOZ5Ad
-NgLzJ1/MfsQQJ7hHVeHmTAjM664V+fXvwUGJLziCeBo1ysWLRnl8CQIDAQABoAAw
-DQYJKoZIhvcNAQEEBQADQQA7uqbrNTjVWpF6By5ZNPvhZ4YdFgkeXFVWi5ao/TaP
-Vq4BG021fJ9nlHRtr4rotpgHDX1rr+iWeHKsx4+5DRSy
------END CERTIFICATE REQUEST-----
\ No newline at end of file
diff --git a/tests/keys/example-com.cert.pem b/tests/keys/example-com.cert.pem
new file mode 100644
index 000000000..af87198aa
--- /dev/null
+++ b/tests/keys/example-com.cert.pem
@@ -0,0 +1,26 @@
+-----BEGIN CERTIFICATE-----
+MIIEVTCCAz2gAwIBAgIJANuZ/6fbAJNcMA0GCSqGSIb3DQEBCwUAMH0xCzAJBgNV
+BAYTAlhXMQswCQYDVQQIDAJYVzEVMBMGA1UEBwwMVGhlIEludGVybmV0MQ8wDQYD
+VQQKDAZTY3JhcHkxGDAWBgNVBAMMD3d3dy5leGFtcGxlLmNvbTEfMB0GCSqGSIb3
+DQEJARYQdGVzdEBleGFtcGxlLmNvbTAgFw0xNjA0MjAxMjExNTZaGA8yMTE2MDMy
+NzEyMTE1NlowfTELMAkGA1UEBhMCWFcxCzAJBgNVBAgMAlhXMRUwEwYDVQQHDAxU
+aGUgSW50ZXJuZXQxDzANBgNVBAoMBlNjcmFweTEYMBYGA1UEAwwPd3d3LmV4YW1w
+bGUuY29tMR8wHQYJKoZIhvcNAQkBFhB0ZXN0QGV4YW1wbGUuY29tMIIBIjANBgkq
+hkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA5r2BzxXivtQXvIwrTGug8l6vjuDhf0WD
+HBU5yIHWvX1rT2MQFuZpu120iFGOK7MBYBBdmAIGsR5cHZ03cqQkAXOGQ5ug5O/u
+d/GZUbcgyv8WCwW71MWLb5CNiDWj/vZq6CUqRg2QkUpkjr/DcQrKsm67yadKLgyd
+G85OyZO6NBuAukQcKrNhspk/Ms55X7RbgYPUbZ0bBee4b3GRnE7PLltIsHo/tloV
+ynC0Sd3T1taYyyG7IJd2LWJELzK0Ww+QUV2qoOdZjl8db1x5c99OR6xY0+Mjf14r
+6kkXOBpPkrJ990qU40+z406u2HPf2abR4D/DUoe9qw+fElCeiuXFFQIDAQABo4HV
+MIHSMB0GA1UdDgQWBBTY3DPInWZxrmQfPHA5w2R+AsbnOjAfBgNVHSMEGDAWgBTY
+3DPInWZxrmQfPHA5w2R+AsbnOjAJBgNVHRMEAjAAMAsGA1UdDwQEAwIFoDBKBgNV
+HREEQzBBggtleGFtcGxlLmNvbYIPd3d3LmV4YW1wbGUuY29tghBtYWlsLmV4YW1w
+bGUuY29tgg9mdHAuZXhhbXBsZS5jb20wLAYJYIZIAYb4QgENBB8WHU9wZW5TU0wg
+R2VuZXJhdGVkIENlcnRpZmljYXRlMA0GCSqGSIb3DQEBCwUAA4IBAQDmnzdIu9XV
+/Tnn5/mt9E98YEsF/eaXBSKG+f2oZc3n2errKwY5qYqULlu8mhajGWTd5EfFCjbD
+lH7nmBdRUAhjzKWntc1G84eaWwHyv+N/5WJrtUfa2A1Ps3Mu9Vz4k2M9HGi/s6KX
+IshezlHe3/TyhIT/WC+MZhpFTL73dpuIgHmp2NjjlJqtG25eC6zmonsc2RApJPa8
+6J0WY/ISH9OwhDSbI9+TIE8QwdC7draiCKK/oid3Jg9fzaEQW+Pr3/4AmYWH1j8s
+iaOVYIXYtt3urQ2Q+qfh34kfRfX5IqAdd1r/fnUjaOLhPNJxGP2KuFaYbdSC9p+n
+bfExzQHUvH2n
+-----END CERTIFICATE-----
diff --git a/tests/keys/example-com.conf b/tests/keys/example-com.conf
new file mode 100644
index 000000000..1f9c25e43
--- /dev/null
+++ b/tests/keys/example-com.conf
@@ -0,0 +1,84 @@
+# this is copied from https://stackoverflow.com/a/27931596
+[ req ]
+default_bits        = 2048
+default_keyfile     = server-key.pem
+distinguished_name  = subject
+req_extensions      = req_ext
+x509_extensions     = x509_ext
+string_mask         = utf8only
+
+# The Subject DN can be formed using X501 or RFC 4514 (see RFC 4519 for a description).
+#   Its sort of a mashup. For example, RFC 4514 does not provide emailAddress.
+[ subject ]
+countryName         = Country Name (2 letter code)
+countryName_default     = US
+
+stateOrProvinceName     = State or Province Name (full name)
+stateOrProvinceName_default = NY
+
+localityName            = Locality Name (eg, city)
+localityName_default        = New York
+
+organizationName         = Organization Name (eg, company)
+organizationName_default    = Example, LLC
+
+# Use a friendly name here because its presented to the user. The server's DNS
+#   names are placed in Subject Alternate Names. Plus, DNS names here is deprecated
+#   by both IETF and CA/Browser Forums. If you place a DNS name here, then you
+#   must include the DNS name in the SAN too (otherwise, Chrome and others that
+#   strictly follow the CA/Browser Baseline Requirements will fail).
+commonName          = Common Name (e.g. server FQDN or YOUR name)
+commonName_default      = Example Company
+
+emailAddress            = Email Address
+emailAddress_default        = test@example.com
+
+# Section x509_ext is used when generating a self-signed certificate. I.e., openssl req -x509 ...
+[ x509_ext ]
+
+subjectKeyIdentifier        = hash
+authorityKeyIdentifier  = keyid,issuer
+
+# You only need digitalSignature below. *If* you don't allow
+#   RSA Key transport (i.e., you use ephemeral cipher suites), then
+#   omit keyEncipherment because that's key transport.
+basicConstraints        = CA:FALSE
+keyUsage            = digitalSignature, keyEncipherment
+subjectAltName          = @alternate_names
+nsComment           = "OpenSSL Generated Certificate"
+
+# RFC 5280, Section 4.2.1.12 makes EKU optional
+#   CA/Browser Baseline Requirements, Appendix (B)(3)(G) makes me confused
+#   In either case, you probably only need serverAuth.
+# extendedKeyUsage  = serverAuth, clientAuth
+
+# Section req_ext is used when generating a certificate signing request. I.e., openssl req ...
+[ req_ext ]
+
+subjectKeyIdentifier        = hash
+
+basicConstraints        = CA:FALSE
+keyUsage            = digitalSignature, keyEncipherment
+subjectAltName          = @alternate_names
+nsComment           = "OpenSSL Generated Certificate"
+
+# RFC 5280, Section 4.2.1.12 makes EKU optional
+#   CA/Browser Baseline Requirements, Appendix (B)(3)(G) makes me confused
+#   In either case, you probably only need serverAuth.
+# extendedKeyUsage  = serverAuth, clientAuth
+
+[ alternate_names ]
+
+DNS.1       = example.com
+DNS.2       = www.example.com
+DNS.3       = mail.example.com
+DNS.4       = ftp.example.com
+
+# Add these if you need them. But usually you don't want them or
+#   need them in production. You may need them for development.
+# DNS.5       = localhost
+# DNS.6       = localhost.localdomain
+# DNS.7       = 127.0.0.1
+
+# IPv6 localhost
+# DNS.8     = ::1
diff --git a/tests/keys/example-com.gen.README b/tests/keys/example-com.gen.README
new file mode 100644
index 000000000..955e7b057
--- /dev/null
+++ b/tests/keys/example-com.gen.README
@@ -0,0 +1,24 @@
+$ openssl req -config example-com.conf \
+    -new -x509 -sha256 -newkey rsa:2048 -nodes \
+    -keyout example-com.key.pem \
+    -days 36500 \
+    -out example-com.cert.pem
+Generating a 2048 bit RSA private key
+....+++
+.....................+++
+writing new private key to 'example-com.key.pem'
+-----
+You are about to be asked to enter information that will be incorporated
+into your certificate request.
+What you are about to enter is what is called a Distinguished Name or a DN.
+There are quite a few fields but you can leave some blank
+For some fields there will be a default value,
+If you enter '.', the field will be left blank.
+-----
+Country Name (2 letter code) [US]:XW
+State or Province Name (full name) [NY]:XW
+Locality Name (eg, city) [New York]:The Internet
+Organization Name (eg, company) [Example, LLC]:Scrapy
+Common Name (e.g. server FQDN or YOUR name) [Example Company]:www.example.com
+Email Address [test@example.com]:
+
diff --git a/tests/keys/example-com.key.pem b/tests/keys/example-com.key.pem
new file mode 100644
index 000000000..56e805df8
--- /dev/null
+++ b/tests/keys/example-com.key.pem
@@ -0,0 +1,28 @@
+-----BEGIN PRIVATE KEY-----
+MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDmvYHPFeK+1Be8
+jCtMa6DyXq+O4OF/RYMcFTnIgda9fWtPYxAW5mm7XbSIUY4rswFgEF2YAgaxHlwd
+nTdypCQBc4ZDm6Dk7+538ZlRtyDK/xYLBbvUxYtvkI2INaP+9mroJSpGDZCRSmSO
+v8NxCsqybrvJp0ouDJ0bzk7Jk7o0G4C6RBwqs2GymT8yznlftFuBg9RtnRsF57hv
+cZGcTs8uW0iwej+2WhXKcLRJ3dPW1pjLIbsgl3YtYkQvMrRbD5BRXaqg51mOXx1v
+XHlz305HrFjT4yN/XivqSRc4Gk+Ssn33SpTjT7PjTq7Yc9/ZptHgP8NSh72rD58S
+UJ6K5cUVAgMBAAECggEAEVxi3vTzmY4Vtx8Ixvg2JOZQ6TwsW0ocoklKjraONLWy
+FEgM1txBSlKzmaohO1J9oP+6Owyz+jGhlqzUljCZcO6DRKT6Bx+yXp3z/jz8H6nv
+u6aTyl+OrTdAHuaCT1W1F4BsXNb6cKQbSs5M4z1/oMtKH9MRdsOGMqhkLzCJSxA+
+E+rRomrP6E9XooLxqpSJooxmX772XPHE7+ZILzRF0viXJ6z0Jd1cOuAQqyIYvmHM
+4313kYJdAKYgJNxe8M8mYLeZcGwImAz/pNQ5R+uVyZlW3kXVzvS7B2m+KcW7Olu8
+r4Ocpdyh20GStpw1f+tk7PLl+SkwFslK+uI7Wl/ygQKBgQD+QsREu1sv507CnvYC
+FZnFryhHUzxMWIX5bvH3YoPbVptqwG5Nj05zIQPCZAEanW4HENCujO9oWbZqLANO
+Th5sNOkBJiC4X6+1NIzQIszaZs4nKIyWNLIOcP4p20k3cR0sS8wLSLwcuW2is27N
+ACKa8u93X1Gb27V0qUhmEqP3NQKBgQDoUY3HRtOVQnpoi5zbTiSKwlBKEkMAawaW
+Q8VSZmrNQZXpcwa2JYN0IeiHnVjctdLul1u9qj5goghTV4XMQ8LSZs0emhvgJxMa
+QpsDLTRr0mBtmduOwZW9a8EcbI2NCth/Irsdl892+y8UVoAO2G6Fgr0DhgXWOJEo
+RcUUkGHyYQKBgHCkT6NLhYhhZykdl0sxGqDTinqey3XfOetZVWUNhfDkG5JdkgBW
+XqDunWW/PCX4XMhQkMLjuSR3qjK7MPO99AhoIFnb9F76NTOIBmInKK4RIX+DnVTm
+H4P6Vv9gIL2pJQ18vva40G3BUGrmJ042ox4WRjSSS+tKmMcIQATIeU+JAoGBAIpr
+TRZW1Oox2c1Aogzo2kzyyfPYPaOaISqW5pLaAviZA0E9D9qnL1OagHmM/s1CaJNQ
+C5FORiw9XsiJdWbnWMUUC1MYb1N29KLI3KNf48P6bQngijjcjuN1uHG/G/fVZnkZ
+sHNJaItzzfFIOLSfr/pMk2HuELw6qAJez7YY8MMBAoGAf5sC2S7duDH2KliMSYhd
+yByHEVTbf7JdJVd/7kq5MPZauJtYztGTSwyyM+gBv+lxdY1jGu/iNu9xAD2DJlE3
+RTPgTIID/PaSmUVotHLq86hbazKPBorx6UWkbGsthEaSF/cTY2eFFFsK/Awoj0yU
+ZhraPWu8S27Pcr9HvZvh5tg=
+-----END PRIVATE KEY-----
diff --git a/tests/keys/localhost-ip.gen.README b/tests/keys/localhost-ip.gen.README
new file mode 100644
index 000000000..8e94e1217
--- /dev/null
+++ b/tests/keys/localhost-ip.gen.README
@@ -0,0 +1,21 @@
+$ openssl req -x509 -sha256 -nodes -newkey rsa:2048 -days 365 -keyout localhost.key -out localhost.crt
+Generating a 2048 bit RSA private key
+...................................................................................................+++
+.....+++
+writing new private key to 'localhost.key'
+-----
+You are about to be asked to enter information that will be incorporated
+into your certificate request.
+What you are about to enter is what is called a Distinguished Name or a DN.
+There are quite a few fields but you can leave some blank
+For some fields there will be a default value,
+If you enter '.', the field will be left blank.
+-----
+Country Name (2 letter code) [AU]:IE
+State or Province Name (full name) [Some-State]:.
+Locality Name (eg, city) []:.
+Organization Name (eg, company) [Internet Widgits Pty Ltd]:Scrapy
+Organizational Unit Name (eg, section) []:.
+Common Name (e.g. server FQDN or YOUR name) []:127.0.0.1
+Email Address []:.
+
diff --git a/tests/keys/localhost.crt b/tests/keys/localhost.crt
new file mode 100644
index 000000000..0cf5256d8
--- /dev/null
+++ b/tests/keys/localhost.crt
@@ -0,0 +1,20 @@
+-----BEGIN CERTIFICATE-----
+MIIDRTCCAi2gAwIBAgIUGoISfeW3LwSWHC52ORXdZY9pNLswDQYJKoZIhvcNAQEL
+BQAwMjELMAkGA1UEBhMCSUUxDzANBgNVBAoMBlNjcmFweTESMBAGA1UEAwwJbG9j
+YWxob3N0MB4XDTIwMDYyODEyNTQxNVoXDTIxMDYyODEyNTQxNVowMjELMAkGA1UE
+BhMCSUUxDzANBgNVBAoMBlNjcmFweTESMBAGA1UEAwwJbG9jYWxob3N0MIIBIjAN
+BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAvCLxfTEQuIdf8JhiHrbVkGHYrNSK
+2XD2TCPaSIpJ2KKlFUrIz3A9tWlOfLnWabS5od89yOebhYj4DN/Qm2TViGg1mtWe
+pD1K2YWd1Af+hhAw5D+TpW2RH9TVhX7Ey5osWcl+0uy+RlKZE8qum72xi1vxWOmH
+wYw06iN8klQ3JfP2/eLRXBQjsh7WW0dbJ7yLvG6UFz1RbhFTtlxeIMenzNsHaMg7
+56Ru57/MMbaBwdBttXVzJDQ7imo8njuxDMszliC/QgIdBUBFzA2LB5qpr+v+laDN
+cN9t9Q9stsu446dFnRoofxJjMFW7lLu6h/lwP5r0kfeUkMDhXJ4mb6KwfwIDAQAB
+o1MwUTAdBgNVHQ4EFgQUVEdXn8ha2FA73zcy1Ia0FQMzMEYwHwYDVR0jBBgwFoAU
+VEdXn8ha2FA73zcy1Ia0FQMzMEYwDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG9w0B
+AQsFAAOCAQEAZpGBPsexMD+IwcMNIgc7FiaJsb8E30C9vWxgdnkpapi9zLJ4yiHQ
+VxkV9RTezUEADkaDj+2qFveamWTzJLnphgaaUpVeMcYACPhRVOYXidNrZyTmHIsX
+FwaTzAggW6CP7JxAcpxH0f9+NWFCZI36FihRdwuWyvrUl7rsXaexu0SOI/Ck0oWf
+2IW+jo67TSmcbte+J8wq77DX32mVLb/2nqpItH4T2Di+XjVBARACVOSdgdlo7lZE
+W8mSEXqP2BVx8JGG8X1znNLHcmjVj4EtkpH0wkYzpC4cvGkTsUcU7CU7ZyVUp+Bb
+dPMVxyRKWfAjRJc8o5Ot1mgHrx5coOtzAA==
+-----END CERTIFICATE-----
diff --git a/tests/keys/localhost.gen.README b/tests/keys/localhost.gen.README
new file mode 100644
index 000000000..19c29a725
--- /dev/null
+++ b/tests/keys/localhost.gen.README
@@ -0,0 +1,21 @@
+$ openssl req -x509 -sha256 -nodes -newkey rsa:2048 -days 365 -keyout localhost.key -out localhost.crt
+Generating a 2048 bit RSA private key
+...................................................................................................+++
+.....+++
+writing new private key to 'localhost.key'
+-----
+You are about to be asked to enter information that will be incorporated
+into your certificate request.
+What you are about to enter is what is called a Distinguished Name or a DN.
+There are quite a few fields but you can leave some blank
+For some fields there will be a default value,
+If you enter '.', the field will be left blank.
+-----
+Country Name (2 letter code) [AU]:IE
+State or Province Name (full name) [Some-State]:.
+Locality Name (eg, city) []:.
+Organization Name (eg, company) [Internet Widgits Pty Ltd]:Scrapy
+Organizational Unit Name (eg, section) []:.
+Common Name (e.g. server FQDN or YOUR name) []:localhost
+Email Address []:.
+
diff --git a/tests/keys/localhost.ip.crt b/tests/keys/localhost.ip.crt
new file mode 100644
index 000000000..48d7bd9a3
--- /dev/null
+++ b/tests/keys/localhost.ip.crt
@@ -0,0 +1,20 @@
+-----BEGIN CERTIFICATE-----
+MIIDNzCCAh+gAwIBAgIJAKAIhM4nA8W7MA0GCSqGSIb3DQEBCwUAMDIxCzAJBgNV
+BAYTAklFMQ8wDQYDVQQKDAZTY3JhcHkxEjAQBgNVBAMMCTEyNy4wLjAuMTAeFw0x
+ODAzMTIxNDMyMjlaFw0xOTAzMTIxNDMyMjlaMDIxCzAJBgNVBAYTAklFMQ8wDQYD
+VQQKDAZTY3JhcHkxEjAQBgNVBAMMCTEyNy4wLjAuMTCCASIwDQYJKoZIhvcNAQEB
+BQADggEPADCCAQoCggEBAK7Vzr+zdsbAEej6D8XFBS5frHnfmqSivQS/zrRZcSVL
+JgPwHJSRMyVCNvlpRV4ulu7I6zTY0ItzeAJPiH/euSokM8AkM87y9GAugljVtuev
+y0uKLUfznPvPZxfYzaB7lyQtU9E6AF8Amtuta8eb7rdqsuqjRopKp3pIheBAfvjV
+ewkMlxz3xcKZHs8T3UWdceWftLEZJSi13FHe/uoohRBiXVn/6DvycBjk1TC+zNpR
+v8mSm+uqcYoG8/CFZ/r1T2EveBH4jZjNReIlM9zFwVHjtjAdunSdMLVY59kBGNE4
+JqxjJ021W2XqoW4VFf6XrIdg8ai4NxHDpWO4blOoMbcCAwEAAaNQME4wHQYDVR0O
+BBYEFBZWEo9+kkTjdGxJdvRNGyhpWfjMMB8GA1UdIwQYMBaAFBZWEo9+kkTjdGxJ
+dvRNGyhpWfjMMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBAGjMcuVr
+idLmbuu/Krxmqnebt0zPLgJXg1ACUEto7110mmEK3jsZg/brdLf74PP+FUa6B/ZP
+8+FJCgF1KZLc3tS9w2OVRSdz+uZ2WYgN6R7uJiVs77BiD6TR6wRrEicRsS6Cq90X
+kNVhqExG4cDr8wGLiCGNfVfFwea7wGhF2zCohF82u1mAgqR/1obas0ils5fh+soJ
+FmTd5A9vCbRpZRXost9J7Z4LCj86MYATgyH9bZp7aN6NJ2nI4uKgeafDFT83c5Vb
+smQ/R0HeP5oylIhpmWWliNjT+XPONPIPDWgQgeFBBofX/vuv82KXz1ZBYfqpArgO
+zh6AcsnjkLumOkM=
+-----END CERTIFICATE-----
diff --git a/tests/keys/localhost.ip.key b/tests/keys/localhost.ip.key
new file mode 100644
index 000000000..1e12c1255
--- /dev/null
+++ b/tests/keys/localhost.ip.key
@@ -0,0 +1,28 @@
+-----BEGIN PRIVATE KEY-----
+MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQCu1c6/s3bGwBHo
++g/FxQUuX6x535qkor0Ev860WXElSyYD8ByUkTMlQjb5aUVeLpbuyOs02NCLc3gC
+T4h/3rkqJDPAJDPO8vRgLoJY1bbnr8tLii1H85z7z2cX2M2ge5ckLVPROgBfAJrb
+rWvHm+63arLqo0aKSqd6SIXgQH741XsJDJcc98XCmR7PE91FnXHln7SxGSUotdxR
+3v7qKIUQYl1Z/+g78nAY5NUwvszaUb/JkpvrqnGKBvPwhWf69U9hL3gR+I2YzUXi
+JTPcxcFR47YwHbp0nTC1WOfZARjROCasYydNtVtl6qFuFRX+l6yHYPGouDcRw6Vj
+uG5TqDG3AgMBAAECggEBAKaLO0g3j3SicC0rT60IEfhr4OOzkh80erQ0dpYsAXES
+FeN4bfFEI6FhYvbRRegCn3pVYGDWDEpasz4YPyH3qxEurTFiCwwfOZUJmNdAtdwc
+BJ8vwBSjRq5EkqMPvkkakg4/M3HCO6pD7EBJAbuCmbKU7FxBLqf7l3AP9594MLud
+JE1zkioK8tz6auBq4qLwDUNJhqv7eug1CKEpfArA9ZqW3orWg21+Octac8R82ZyD
+bt+Veh0vWd16MkcSX574vydqYzNiseY70yNjBRxHLD+/HA8BvWn7M6d0ULuEN1UT
+ojm+NAMc65ms3MkXksdUeDQ3eFIF9M4+/rTRU8gHeAECgYEA487ERT3/qEDMezYx
+KcUkLE2VwqqnW0+Sfd6fzOG+VGqeYgHG/d9sjo1RsJR/D/ZgzO3oeJ4lgov3HN5N
+yfPIGyJfYd7p9WWml4AiWvj3YVg5V4vmwnDs7LBxHU60bLClgvMQx4iSZ4q4QrXA
+hRLBDrJuNGvuLUqFb6jar8BtVbcCgYEAxHjORgNxsBfzuAs0ZfvVyTYai4f+92U+
+32tPxghpI4gHnQnz7MbUccJGy+SR23N8DLNJv8K+LbVm7UNIdsy6d5b9vazkYIie
+PyS3ynRO3vgIL3NbMC2cc+uc2dL2n/FnMA8nrdZMTgXukmnCn8tzSLphoZBu7SaY
+r9938XE8BAECgYEAmuXzCun3Nl6pK3ZTw4Uq7Xzrwevr0+itQSzpF5S/qAK/IwD2
+X5VV6TAqRZkTNLVgaLe0BJ/z/WpSYqy90/4RKHIczR2Xk6bEuesEcTssamJkyyRz
+ie7jCqWGpFjp0aXjRMElvacddY4bcDDJcTKpVub4jGh/EQjE5oG4AR0kus0CgYBZ
+Eed56C/PRFySUEoV/gCisquAHExjvfut8Al/XurDV/UTpaJ28oD3fbr4zoutcIKJ
+g3JoxBHRyQ57e+hLK29RrhsktU/nz6fmOnA0EVx8SvfzAxoREmx+RQ+b1L9ILXm5
+WPWFIsT/DkNlDxtTtDl0fEKsqz0OuFO6T9YhmFM8AQKBgCFn6FV8AdzLBtdKrPT+
+inQASBr264pb5lp7g9JdBmaQZ3McrQ35VOA3ZfhyTAMhYtY1wk0xp8+fW1bV325u
+BiLdJ/gAocPBRlw7rS0rq1+U1+zAQCgxutrm2aRQd1qEUrCRvCtCyIeuUntshHAz
+m1Q+9xJdtRxlYc1YGTK1YGCq
+-----END PRIVATE KEY-----
diff --git a/tests/keys/localhost.key b/tests/keys/localhost.key
new file mode 100644
index 000000000..8fc373bdd
--- /dev/null
+++ b/tests/keys/localhost.key
@@ -0,0 +1,28 @@
+-----BEGIN PRIVATE KEY-----
+MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQC8IvF9MRC4h1/w
+mGIettWQYdis1IrZcPZMI9pIiknYoqUVSsjPcD21aU58udZptLmh3z3I55uFiPgM
+39CbZNWIaDWa1Z6kPUrZhZ3UB/6GEDDkP5OlbZEf1NWFfsTLmixZyX7S7L5GUpkT
+yq6bvbGLW/FY6YfBjDTqI3ySVDcl8/b94tFcFCOyHtZbR1snvIu8bpQXPVFuEVO2
+XF4gx6fM2wdoyDvnpG7nv8wxtoHB0G21dXMkNDuKajyeO7EMyzOWIL9CAh0FQEXM
+DYsHmqmv6/6VoM1w3231D2y2y7jjp0WdGih/EmMwVbuUu7qH+XA/mvSR95SQwOFc
+niZvorB/AgMBAAECggEAHVpSVRb/pdqxNEeCH4qlHWa2uJhcpXpDYzPAzcqNpPgT
+S5QkaoD3j8NDVKBl/I4O3FuJNzwzfo0VLmUJFgWQbzzbCDJGExfhArkfG8K3ilEi
+X6ovrgK/PrklKzPRHncKbmPKnrwDH9OpQHZB8diRx81rhVTCModehh1NRUNQa2I1
+QzFC7uyXx3duoIsI5QXVeEGuwHZfqIY/z+9SscdVFL6elXTPFUzBzcmAqQgdgWKN
+HXgX22LE0rAu8NnRvOZZWt4/nOjvlCFCPTB11NgthmKlVnsx4H7gpQ2OPh4bZ+0W
+birVEtZ3E1jxoGvw1FzxyqqpGkcanRMa8QWzK4JwuQKBgQDrgclpkqZrgHB/TC1p
+hLvsdflGI2SGs+c/mYR3GEjf0kJtI88WL5fj1QezdkDyOpwxFvnLslswfzdtzvis
+vksGysV35vhMPQUcmWhvzA7Pdxdv4BZr+ckER0SAYBBxg9KYZyxewGb5XzB8Cz2o
+8V+YpwrMAOYGuXHTfafv4CKlTQKBgQDMgetvV9/E3HNtKsATiPIwT3e1MzyPXigq
+12NkHSZa6s4yqm/h/fSUn54sJbhx+OtRRhktOo0aB34tcogtrJyClvCPdRAP/4Qi
+M43FjKo2cWiubWvtWlOZU04bpClG324q420rK7dCA2stID/Fa0sMQgAAyPH8TGMo
+gbvyrk4W+wKBgQDMIOnYZTF0epaH8BponJFaqwMOhTzr+OGW4dTMebMotZG4EdK8
+kzIfW5XaOsSecKjTb+vCYGzkA1CjEEPBTwuu7nDstblAM5/Lozi/tmqb7sjUwrIM
+kyxmVfONJjb6fV07lioCUtiui5B15DRkzBqlMRyNqLW43GJKA19d7rN4/QKBgCzy
+kRBTu/bEjQn9T2H7w18i2CiXLkREaYeg91NVpMxutwsjspt0+YCA5H7He5ZxIycl
+xPrP15tU8kKC3bNMMMny6sRc8j7R5fSuaAZ3OCHnIx7TJdlw9NbKHGyu0/Ojv87l
+VWUbopd7sN6mK930CvaSuvVxNN5C27hXazuXW8ppAoGBANcWsenNKpCJgF0cNPHX
+abPaWfcs5FKMNz8gEdGk3B1z/KBpYz59smPwurYVCXaWE6iv99sDOP7CVneF02sV
+SqyNzVhcVSG788uB3CwnpEvm7ydoH89L5dvYekAHP8RJulhWCK45lXkHLiYGKvhv
+PWuPk5VX+qF78JhUhPO3nfnu
+-----END PRIVATE KEY-----
diff --git a/tests/keys/mitmproxy-ca.pem b/tests/keys/mitmproxy-ca.pem
index 08004feca..cdef75f99 100644
--- a/tests/keys/mitmproxy-ca.pem
+++ b/tests/keys/mitmproxy-ca.pem
@@ -1,32 +1,50 @@
------BEGIN RSA PRIVATE KEY-----
-MIICWwIBAAKBgQDKLbznLxS7HSWvrmGcvVS6eQvjEWD705/csvnk/WtqAPfQMJKt
-auFBxzPt6RT60SHtj/2FKt2gqsiE6cNINxGN6fGYD7HtaM5HXRVPUKJaMipJwHha
-QivjIZoueraY/MtlyCkpp6dmMnHEpGY7OzwMyh1eCBHQ2JYx6VEzbks9ewIDAQAB
-AoGAMpS2ye/Rc+6a2xT5fskvRWe7PZe/d8E+IWz1cACmuuJ7HS7Jw3EV4esAZukF
-QqrHnjOD7akHwYZ4nCgPnyWH0lLx/4TIXE5QeLPFrhKOsSLCyhlCwNVJAdcOrDol
-Qh2694Dsd4gAy5o6TA02cBpqArnbAUERX46bHBZRA+ths8ECQQD6r1Ls+bTBR52w
-T3rPPhYj7EsXp40MJt0pLf1kjf+EH1bxsUqnxLawwo/lLE9omU73DFnfrAflk2Ll
-KUPCjjYpAkEAznchXk2ITeRcClrBNA+1Izpb5yG1qkfc79u/CEVDDOvt7RO/89Oj
-58R3pKTyffoo34fBdJz8GYDsmOeiyJEjAwJANpSHrJrtlQt/tMyJQ6gT7/xZmSvc
-1OF9U6L0wbj9AgpExtjAFWkKEdA6vj34iCChBb8FrmJpUb3WUWi7nReTiQJAFyIT
-9Av93LRcd7CJezrTUdolF/WX9DdPEvTtJ5ETHSyGIQ0Yccph0AMcYK82mFTiJYGB
-dH5uZLEkUVGK1KwmXwJAGWLdYiQyQitRWdoURcLb4OZ2gF3+7PASgFilI8YuoYhn
-Rl2Va3UtErPKJeMg2dTH18PuXykQMsQR1+rPxf1WSA==
------END RSA PRIVATE KEY-----
+-----BEGIN PRIVATE KEY-----
+MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQCYp6U4G9YWITYB
+/JlZ+Hd08c/9a157WVl03hbR2DSK8FnK+D8cp2dGzuTfC08w8M/yvVYPcbb7ZDiT
+NUsVwboFvmr/6mN6M9uQioCRStrP6Rkm2Wuagyj+GjqLwogTJlPiPwEPhlMgz1BJ
+u6jQQSgiMsxKWMkVz3pCYERUMRX0DEgYST9rjYUAwD4rPv8XXtLLSPs0VniIggUH
+JrngDUrtoK5Wuf098NJPIwW8uE2ev+DXH2Iuwn2fNKt5lSYypJdUZjyamwuE6HFB
+eIBAIIKijMz/8UV1+H8Q0OcU2Sva2FglHREQtA/S5FlpcuTZt/77Vnxv75y/0zls
+90iyQ3E/AgMBAAECggEBAJA1dyAdM85uC04vKVNUJM1GDp0xS+0syBReJaKRI3nJ
+epoCj+RqxGag1pdaYLI0G84NTPqECz9LOyLdqpPgEfKRIxWlf9oWmSnfnXskArd8
+VfVcWYl6tEPv1TToTZIBmCbYLBFVbLxG/GrbK6uokdhUsqbdXwEKok2IEaSTRlDn
+v8BVXte00d9VEKKpmI6EY3f45uPQPHuJNcitP2HGW1mT/C6XoZR6wj+VvoRgUGQT
+I7PuktbYpQlLV+oX0uZz9frPGhjydUq0Jti5v3QAJEb+7D0cKrkZW+7fYDx4YkRU
+oDiuWEyO2kfpff52Qxs+xUXMiAyw6/8+TamKoAi1TIECgYEAyAzoztW6W4CjL2au
+/hN5VmbAvuBxq1m1G5KgXM1myX9V2CgH6OKwzJQNSCEfKMNOjqxB99T7C3tMCjgG
+gmbUzylTeciQFF+crrl2Rn/6qZS9dCo1hagb3K5eXMhLXoP425Y4sypNPPqULhPn
+YrUDFNAf89rRLqP1KMPLZ+uO7EECgYEAw1lWPxGV+X85iQxYN9xoX85htfJSBXTf
+dLirQ4bkykOxSA6ZzFuhDO/G373Q1rze4tmEO790uOCeaiXGgeWC1A+2PMO957i5
+9FqhDIkmerfdIttdEUMM9rQwuTcLnixGZkT5GHDzjtNinaIVB+pv7twRAESqN9dC
+QXh7IF7g/X8CgYBMhQOX+hCqZ24D95cAAJrs/ajEWj2geVPZFCDa3oZulJJVeBpu
+bieKWScra9/rS6mE0Ub6cTEFl0fisMNspcDI7NnNP3Y9FMVt3+rp1JIgw5AkGvEW
+CtN9egUGIGcT5A8Qj0lo3slkhcSgS2S6UNq431MZh51z5askyJ/JREULAQKBgFrR
+OatwfYzUfOcd+hVePpfr1rlDwqYOw6P8BoMKP2tZNR4Oy6maH7Fn98kk8eYjQGuu
+PC+avqUEqCEpFrRlAwGbnFl7ltoXozvatmyhhmYe/Iur+ASCa5B2DQDOenQ6mTAK
+eNPIDzMjSwGFzMk1UHx3it/ZDFmRlZfibzuJYIf5AoGBAIaPHk4qadK/XpcD4Wwx
+BOsDEIz27DGWdwWfd5r3EcV4zX/wNzH0G1Z8eydNjUqKzufMZgFwpcTu0Evesl1/
+B8kC8sLHxQoG5SvBu4dBxMwKIU9O9uFnX5SUYZUDpCtUYyZ+GtGom41Jwg5ENrwy
+HzPh2taMnCA0h1fNLFFBkw88
+-----END PRIVATE KEY-----
 -----BEGIN CERTIFICATE-----
-MIICnzCCAgigAwIBAgIGDI2K/EOjMA0GCSqGSIb3DQEBBQUAMCgxEjAQBgNVBAMT
-CW1pdG1wcm94eTESMBAGA1UEChMJbWl0bXByb3h5MB4XDTEzMDkyNjE0MzYxMVoX
-DTE1MDkxNjE0MzYxMVowKDESMBAGA1UEAxMJbWl0bXByb3h5MRIwEAYDVQQKEwlt
-aXRtcHJveHkwgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMotvOcvFLsdJa+u
-YZy9VLp5C+MRYPvTn9yy+eT9a2oA99Awkq1q4UHHM+3pFPrRIe2P/YUq3aCqyITp
-w0g3EY3p8ZgPse1ozkddFU9QoloyKknAeFpCK+Mhmi56tpj8y2XIKSmnp2YyccSk
-Zjs7PAzKHV4IEdDYljHpUTNuSz17AgMBAAGjgdMwgdAwDwYDVR0TAQH/BAUwAwEB
-/zAUBglghkgBhvhCAQEBAf8EBAMCAgQwewYDVR0lAQH/BHEwbwYIKwYBBQUHAwEG
-CCsGAQUFBwMCBggrBgEFBQcDBAYIKwYBBQUHAwgGCisGAQQBgjcCARUGCisGAQQB
-gjcCARYGCisGAQQBgjcKAwEGCisGAQQBgjcKAwMGCisGAQQBgjcKAwQGCWCGSAGG
-+EIEATALBgNVHQ8EBAMCAQYwHQYDVR0OBBYEFJBEfawVwhEHHW6rS8nvZFlJ582n
-MA0GCSqGSIb3DQEBBQUAA4GBAHGl28Ip2CWS/MibCaFztLDxGiMBT4MW2yI2hf3D
-y9g1o7ra/fSEFdIc849xXyCsGWSkMsbDML272rCH4K73MUBxxkJm46AIyRVH1z2Z
-e96u4py1wNT8cznY15phr8pn36snlaHaYa+JcwGINMdSOk1VPHv6gqSC/vgUCgF1
-n95u
+MIIDoTCCAomgAwIBAgIGDodLQx9+MA0GCSqGSIb3DQEBCwUAMCgxEjAQBgNVBAMM
+CW1pdG1wcm94eTESMBAGA1UECgwJbWl0bXByb3h5MB4XDTIwMDgxMjE3MDMyNloX
+DTIzMDgxNDE3MDMyNlowKDESMBAGA1UEAwwJbWl0bXByb3h5MRIwEAYDVQQKDAlt
+aXRtcHJveHkwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCYp6U4G9YW
+ITYB/JlZ+Hd08c/9a157WVl03hbR2DSK8FnK+D8cp2dGzuTfC08w8M/yvVYPcbb7
+ZDiTNUsVwboFvmr/6mN6M9uQioCRStrP6Rkm2Wuagyj+GjqLwogTJlPiPwEPhlMg
+z1BJu6jQQSgiMsxKWMkVz3pCYERUMRX0DEgYST9rjYUAwD4rPv8XXtLLSPs0VniI
+ggUHJrngDUrtoK5Wuf098NJPIwW8uE2ev+DXH2Iuwn2fNKt5lSYypJdUZjyamwuE
+6HFBeIBAIIKijMz/8UV1+H8Q0OcU2Sva2FglHREQtA/S5FlpcuTZt/77Vnxv75y/
+0zls90iyQ3E/AgMBAAGjgdAwgc0wDwYDVR0TAQH/BAUwAwEB/zARBglghkgBhvhC
+AQEEBAMCAgQweAYDVR0lBHEwbwYIKwYBBQUHAwEGCCsGAQUFBwMCBggrBgEFBQcD
+BAYIKwYBBQUHAwgGCisGAQQBgjcCARUGCisGAQQBgjcCARYGCisGAQQBgjcKAwEG
+CisGAQQBgjcKAwMGCisGAQQBgjcKAwQGCWCGSAGG+EIEATAOBgNVHQ8BAf8EBAMC
+AQYwHQYDVR0OBBYEFBCsLPpFz3l9rOOfGmfs+VRc3jhJMA0GCSqGSIb3DQEBCwUA
+A4IBAQADTpA15na6U5qqDCe0rr39fkS1/dY804Xnz7g/L3AsxPE1KOMijuJa8sKd
+kKwba1173FwMupfK39zY8jUxL8Qprdi92RO6CpoFUsL/icpA///lYhzUSqt32qwe
+gRNW3mtYBimOk6KH1NOfQnJolWpJh+g1OEsitQKEeKwIn5Hz+8/yS5tbwLgdnMlY
+1/it1H70JSdE7nfJueqN4cFfBsm6XaHZzacJJmN7WP88fd+zztnSQsBFbLlnjnqj
+envCDIwCrMywKNMqEBMwmBEGSAF47fVNYj6KzDAtMvBdDkYaHWpBf4tnFfk6v0wj
+wiKjdLjCmJgjGAQjRw5VYJ8JI0XO
 -----END CERTIFICATE-----
diff --git a/tests/mocks/dummydbm.py b/tests/mocks/dummydbm.py
index 40d9293b2..75c74daf5 100644
--- a/tests/mocks/dummydbm.py
+++ b/tests/mocks/dummydbm.py
@@ -13,10 +13,11 @@ error = KeyError
 
 _DATABASES = collections.defaultdict(DummyDB)
 
+
 def open(file, flag='r', mode=0o666):
     """Open or create a dummy database compatible.
 
-    Arguments `flag` and `mode` are ignored.
+    Arguments ``flag`` and ``mode`` are ignored.
     """
     # return same instance for same file argument
     return _DATABASES[file]
diff --git a/tests/mockserver.py b/tests/mockserver.py
index 6910532b6..48d7b8d37 100644
--- a/tests/mockserver.py
+++ b/tests/mockserver.py
@@ -1,36 +1,36 @@
-from __future__ import print_function
-import sys, time, random, urllib, os, json
+import argparse
+import json
+import os
+import random
+import sys
+from pathlib import Path
+from shutil import rmtree
 from subprocess import Popen, PIPE
-from twisted.web.server import Site, NOT_DONE_YET
-from twisted.web.resource import Resource
-from twisted.internet import reactor, defer, ssl
-from scrapy import twisted_version
+from tempfile import mkdtemp
+from urllib.parse import urlencode
+
+from OpenSSL import SSL
+from twisted.internet import defer, reactor, ssl
+from twisted.internet.task import deferLater
+from twisted.names import dns, error
+from twisted.names.server import DNSServerFactory
+from twisted.web.resource import EncodingResourceWrapper, Resource
+from twisted.web.server import GzipEncoderFactory, NOT_DONE_YET, Site
+from twisted.web.static import File
+from twisted.web.test.test_webclient import PayloadResource
+from twisted.web.util import redirectTo
+
+from scrapy.utils.python import to_bytes, to_unicode
+from scrapy.utils.ssl import SSL_OP_NO_TLSv1_3
+from scrapy.utils.test import get_testenv
 
 
-if twisted_version < (11, 0, 0):
-    def deferLater(clock, delay, func, *args, **kw):
-        def _cancel_method():
-            _cancel_cb(None)
-            d.errback(Exception())
-
-        def _cancel_cb(result):
-            if cl.active():
-                cl.cancel()
-            return result
-
-        d = defer.Deferred()
-        d.cancel = _cancel_method
-        d.addCallback(lambda ignored: func(*args, **kw))
-        d.addBoth(_cancel_cb)
-        cl = clock.callLater(delay, d.callback, None)
-        return d
-else:
-    from twisted.internet.task import deferLater
-
-
-def getarg(request, name, default=None, type=str):
+def getarg(request, name, default=None, type=None):
     if name in request.args:
-        return type(request.args[name][0])
+        value = request.args[name][0]
+        if type is not None:
+            value = type(value)
+        return value
     else:
         return default
 
@@ -53,12 +53,12 @@ class LeafResource(Resource):
 class Follow(LeafResource):
 
     def render(self, request):
-        total = getarg(request, "total", 100, type=int)
-        show = getarg(request, "show", 1, type=int)
-        order = getarg(request, "order", "desc")
-        maxlatency = getarg(request, "maxlatency", 0, type=float)
-        n = getarg(request, "n", total, type=int)
-        if order == "rand":
+        total = getarg(request, b"total", 100, type=int)
+        show = getarg(request, b"show", 1, type=int)
+        order = getarg(request, b"order", b"desc")
+        maxlatency = getarg(request, b"maxlatency", 0, type=float)
+        n = getarg(request, b"n", total, type=int)
+        if order == b"rand":
             nlist = [random.randint(1, total) for _ in range(show)]
         else:  # order == "desc"
             nlist = range(n, max(n - show, 0), -1)
@@ -71,19 +71,19 @@ class Follow(LeafResource):
         s = """<html> <head></head> <body>"""
         args = request.args.copy()
         for nl in nlist:
-            args["n"] = [str(nl)]
-            argstr = urllib.urlencode(args, doseq=True)
+            args[b"n"] = [to_bytes(str(nl))]
+            argstr = urlencode(args, doseq=True)
             s += "<a href='/follow?%s'>follow %d</a><br>" % (argstr, nl)
         s += """</body>"""
-        request.write(s)
+        request.write(to_bytes(s))
         request.finish()
 
 
 class Delay(LeafResource):
 
     def render_GET(self, request):
-        n = getarg(request, "n", 1, type=float)
-        b = getarg(request, "b", 1, type=int)
+        n = getarg(request, b"n", 1, type=float)
+        b = getarg(request, b"b", 1, type=int)
         if b:
             # send headers now and delay body
             request.write('')
@@ -91,16 +91,16 @@ class Delay(LeafResource):
         return NOT_DONE_YET
 
     def _delayedRender(self, request, n):
-        request.write("Response delayed for %0.3f seconds\n" % n)
+        request.write(to_bytes("Response delayed for %0.3f seconds\n" % n))
         request.finish()
 
 
 class Status(LeafResource):
 
     def render_GET(self, request):
-        n = getarg(request, "n", 200, type=int)
+        n = getarg(request, b"n", 200, type=int)
         request.setResponseCode(n)
-        return ""
+        return b""
 
 
 class Raw(LeafResource):
@@ -112,7 +112,7 @@ class Raw(LeafResource):
     render_POST = render_GET
 
     def _delayedRender(self, request):
-        raw = getarg(request, 'raw', 'HTTP 1.1 200 OK\n')
+        raw = getarg(request, b'raw', b'HTTP 1.1 200 OK\n')
         request.startedWriting = 1
         request.write(raw)
         request.channel.transport.loseConnection()
@@ -123,29 +123,42 @@ class Echo(LeafResource):
 
     def render_GET(self, request):
         output = {
-            'headers': dict(request.requestHeaders.getAllRawHeaders()),
-            'body': request.content.read(),
+            'headers': dict(
+                (to_unicode(k), [to_unicode(v) for v in vs])
+                for k, vs in request.requestHeaders.getAllRawHeaders()),
+            'body': to_unicode(request.content.read()),
         }
-        return json.dumps(output)
+        return to_bytes(json.dumps(output))
+    render_POST = render_GET
+
+
+class RedirectTo(LeafResource):
+
+    def render(self, request):
+        goto = getarg(request, b'goto', b'/')
+        # we force the body content, otherwise Twisted redirectTo()
+        # returns HTML with <meta http-equiv="refresh"
+        redirectTo(goto, request)
+        return b'redirecting...'
 
 
 class Partial(LeafResource):
 
     def render_GET(self, request):
-        request.setHeader("Content-Length", "1024")
+        request.setHeader(b"Content-Length", b"1024")
         self.deferRequest(request, 0, self._delayedRender, request)
         return NOT_DONE_YET
 
     def _delayedRender(self, request):
-        request.write("partial content\n")
+        request.write(b"partial content\n")
         request.finish()
 
 
 class Drop(Partial):
 
     def _delayedRender(self, request):
-        abort = getarg(request, "abort", 0, type=int)
-        request.write("this connection will be dropped\n")
+        abort = getarg(request, b"abort", 0, type=int)
+        request.write(b"this connection will be dropped\n")
         tr = request.channel.transport
         try:
             if abort and hasattr(tr, 'abortConnection'):
@@ -156,53 +169,161 @@ class Drop(Partial):
             request.finish()
 
 
+class ArbitraryLengthPayloadResource(LeafResource):
+
+    def render(self, request):
+        return request.content.read()
+
+
 class Root(Resource):
 
     def __init__(self):
         Resource.__init__(self)
-        self.putChild("status", Status())
-        self.putChild("follow", Follow())
-        self.putChild("delay", Delay())
-        self.putChild("partial", Partial())
-        self.putChild("drop", Drop())
-        self.putChild("raw", Raw())
-        self.putChild("echo", Echo())
+        self.putChild(b"status", Status())
+        self.putChild(b"follow", Follow())
+        self.putChild(b"delay", Delay())
+        self.putChild(b"partial", Partial())
+        self.putChild(b"drop", Drop())
+        self.putChild(b"raw", Raw())
+        self.putChild(b"echo", Echo())
+        self.putChild(b"payload", PayloadResource())
+        self.putChild(b"xpayload", EncodingResourceWrapper(PayloadResource(), [GzipEncoderFactory()]))
+        self.putChild(b"alpayload", ArbitraryLengthPayloadResource())
+        try:
+            from tests import tests_datadir
+            self.putChild(b"files", File(os.path.join(tests_datadir, 'test_site/files/')))
+        except Exception:
+            pass
+        self.putChild(b"redirect-to", RedirectTo())
 
     def getChild(self, name, request):
         return self
 
     def render(self, request):
-        return 'Scrapy mock HTTP server\n'
+        return b'Scrapy mock HTTP server\n'
 
 
-class MockServer():
+class MockServer:
 
     def __enter__(self):
-        from scrapy.utils.test import get_testenv
-        self.proc = Popen([sys.executable, '-u', '-m', 'tests.mockserver'],
+        self.proc = Popen([sys.executable, '-u', '-m', 'tests.mockserver', '-t', 'http'],
                           stdout=PIPE, env=get_testenv())
-        self.proc.stdout.readline()
+        http_address = self.proc.stdout.readline().strip().decode('ascii')
+        https_address = self.proc.stdout.readline().strip().decode('ascii')
+
+        self.http_address = http_address
+        self.https_address = https_address
+
+        return self
 
     def __exit__(self, exc_type, exc_value, traceback):
         self.proc.kill()
-        self.proc.wait()
-        time.sleep(0.2)
+        self.proc.communicate()
+
+    def url(self, path, is_secure=False):
+        host = self.https_address if is_secure else self.http_address
+        host = host.replace('0.0.0.0', '127.0.0.1')
+        return host + path
+
+
+class MockDNSResolver:
+    """
+    Implements twisted.internet.interfaces.IResolver partially
+    """
+
+    def _resolve(self, name):
+        record = dns.Record_A(address=b"127.0.0.1")
+        answer = dns.RRHeader(name=name, payload=record)
+        return [answer], [], []
+
+    def query(self, query, timeout=None):
+        if query.type == dns.A:
+            return defer.succeed(self._resolve(query.name.name))
+        return defer.fail(error.DomainError())
+
+    def lookupAllRecords(self, name, timeout=None):
+        return defer.succeed(self._resolve(name))
+
+
+class MockDNSServer:
+
+    def __enter__(self):
+        self.proc = Popen([sys.executable, '-u', '-m', 'tests.mockserver', '-t', 'dns'],
+                          stdout=PIPE, env=get_testenv())
+        self.host = '127.0.0.1'
+        self.port = int(self.proc.stdout.readline().strip().decode('ascii').split(":")[1])
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.proc.kill()
+        self.proc.communicate()
+
+
+class MockFTPServer:
+    """Creates an FTP server on port 2121 with a default passwordless user
+    (anonymous) and a temporary root path that you can read from the
+    :attr:`path` attribute."""
+
+    def __enter__(self):
+        self.path = Path(mkdtemp())
+        self.proc = Popen([sys.executable, '-u', '-m', 'tests.ftpserver', '-d', str(self.path)],
+                          stderr=PIPE, env=get_testenv())
+        for line in self.proc.stderr:
+            if b'starting FTP server' in line:
+                break
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        rmtree(str(self.path))
+        self.proc.kill()
+        self.proc.communicate()
+
+    def url(self, path):
+        return 'ftp://127.0.0.1:2121/' + path
+
+
+def ssl_context_factory(keyfile='keys/localhost.key', certfile='keys/localhost.crt', cipher_string=None):
+    factory = ssl.DefaultOpenSSLContextFactory(
+        os.path.join(os.path.dirname(__file__), keyfile),
+        os.path.join(os.path.dirname(__file__), certfile),
+    )
+    if cipher_string:
+        ctx = factory.getContext()
+        # disabling TLS1.2+ because it unconditionally enables some strong ciphers
+        ctx.set_options(SSL.OP_CIPHER_SERVER_PREFERENCE | SSL.OP_NO_TLSv1_2 | SSL_OP_NO_TLSv1_3)
+        ctx.set_cipher_list(to_bytes(cipher_string))
+    return factory
 
 
 if __name__ == "__main__":
-    root = Root()
-    factory = Site(root)
-    httpPort = reactor.listenTCP(8998, factory)
-    contextFactory = ssl.DefaultOpenSSLContextFactory(
-         os.path.join(os.path.dirname(__file__), 'keys/cert.pem'),
-         os.path.join(os.path.dirname(__file__), 'keys/cert.pem'),
-         )
-    httpsPort = reactor.listenSSL(8999, factory, contextFactory)
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-t", "--type", type=str, choices=("http", "dns"), default="http")
+    args = parser.parse_args()
+
+    if args.type == "http":
+        root = Root()
+        factory = Site(root)
+        httpPort = reactor.listenTCP(0, factory)
+        contextFactory = ssl_context_factory()
+        httpsPort = reactor.listenSSL(0, factory, contextFactory)
+
+        def print_listening():
+            httpHost = httpPort.getHost()
+            httpsHost = httpsPort.getHost()
+            httpAddress = "http://%s:%d" % (httpHost.host, httpHost.port)
+            httpsAddress = "https://%s:%d" % (httpsHost.host, httpsHost.port)
+            print(httpAddress)
+            print(httpsAddress)
+
+    elif args.type == "dns":
+        clients = [MockDNSResolver()]
+        factory = DNSServerFactory(clients=clients)
+        protocol = dns.DNSDatagramProtocol(controller=factory)
+        listener = reactor.listenUDP(0, protocol)
+
+        def print_listening():
+            host = listener.getHost()
+            print("%s:%s" % (host.host, host.port))
 
-    def print_listening():
-        httpHost = httpPort.getHost()
-        httpsHost = httpsPort.getHost()
-        print("Mock server running at http://%s:%d and https://%s:%d" % (
-            httpHost.host, httpHost.port, httpsHost.host, httpsHost.port))
     reactor.callWhenRunning(print_listening)
     reactor.run()
diff --git a/tests/pipelines.py b/tests/pipelines.py
new file mode 100644
index 000000000..fed2af7d3
--- /dev/null
+++ b/tests/pipelines.py
@@ -0,0 +1,18 @@
+"""
+Some pipelines used for testing
+"""
+
+
+class ZeroDivisionErrorPipeline:
+
+    def open_spider(self, spider):
+        1 / 0
+
+    def process_item(self, item, spider):
+        return item
+
+
+class ProcessWithZeroDivisionErrorPipiline:
+
+    def process_item(self, item, spider):
+        1 / 0
diff --git a/tests/py3-ignores.txt b/tests/py3-ignores.txt
deleted file mode 100644
index c1619b3ae..000000000
--- a/tests/py3-ignores.txt
+++ /dev/null
@@ -1,95 +0,0 @@
-tests/test_closespider.py
-tests/test_cmdline/__init__.py
-tests/test_command_fetch.py
-tests/test_command_shell.py
-tests/test_commands.py
-tests/test_command_version.py
-tests/test_contrib_exporter.py
-tests/test_contrib_linkextractors.py
-tests/test_contrib_loader.py
-tests/test_crawl.py
-tests/test_djangoitem/__init__.py
-tests/test_downloader_handlers.py
-tests/test_downloadermiddleware_ajaxcrawlable.py
-tests/test_downloadermiddleware_cookies.py
-tests/test_downloadermiddleware_decompression.py
-tests/test_downloadermiddleware_defaultheaders.py
-tests/test_downloadermiddleware_httpauth.py
-tests/test_downloadermiddleware_httpcache.py
-tests/test_downloadermiddleware_httpcompression.py
-tests/test_downloadermiddleware_httpproxy.py
-tests/test_downloadermiddleware.py
-tests/test_downloadermiddleware_redirect.py
-tests/test_downloadermiddleware_retry.py
-tests/test_downloadermiddleware_robotstxt.py
-tests/test_downloadermiddleware_useragent.py
-tests/test_dupefilter.py
-tests/test_engine.py
-tests/test_http_cookies.py
-tests/test_http_request.py
-tests/test_http_response.py
-tests/test_logformatter.py
-tests/test_log.py
-tests/test_mail.py
-tests/test_middleware.py
-tests/test_pipeline_files.py
-tests/test_pipeline_images.py
-tests/test_pipeline_media.py
-tests/test_proxy_connect.py
-tests/test_responsetypes.py
-tests/test_selector_csstranslator.py
-tests/test_selector_lxmldocument.py
-tests/test_selector.py
-tests/test_settings/__init__.py
-tests/test_spidermanager/__init__.py
-tests/test_spidermanager/test_spiders/__init__.py
-tests/test_spidermanager/test_spiders/spider0.py
-tests/test_spidermanager/test_spiders/spider1.py
-tests/test_spidermanager/test_spiders/spider2.py
-tests/test_spidermanager/test_spiders/spider3.py
-tests/test_spidermanager/test_spiders/spider4.py
-tests/test_spidermiddleware_httperror.py
-tests/test_spidermiddleware_referer.py
-tests/test_spider.py
-tests/test_utils_defer.py
-tests/test_utils_iterators.py
-tests/test_utils_jsonrpc.py
-tests/test_utils_python.py
-tests/test_utils_reqser.py
-tests/test_utils_request.py
-tests/test_utils_response.py
-tests/test_utils_serialize.py
-tests/test_utils_signal.py
-tests/test_utils_template.py
-tests/test_utils_url.py
-tests/test_webclient.py
-
-scrapy/xlib/tx/iweb.py
-scrapy/xlib/tx/interfaces.py
-scrapy/xlib/tx/endpoints.py
-scrapy/xlib/tx/client.py
-scrapy/xlib/tx/_newclient.py
-scrapy/xlib/tx/__init__.py
-scrapy/xlib/tx/__init__.py
-scrapy/utils/testsite.py
-scrapy/http/cookies.py
-scrapy/core/downloader/handlers/s3.py
-scrapy/core/downloader/handlers/http11.py
-scrapy/core/downloader/handlers/http.py
-scrapy/core/downloader/handlers/ftp.py
-scrapy/core/downloader/webclient.py
-scrapy/contrib/pipeline/images.py
-scrapy/contrib/pipeline/files.py
-scrapy/contrib/linkextractors/sgml.py
-scrapy/contrib/linkextractors/regex.py
-scrapy/contrib/linkextractors/htmlparser.py
-scrapy/contrib/downloadermiddleware/retry.py
-scrapy/contrib/downloadermiddleware/httpproxy.py
-scrapy/contrib/downloadermiddleware/cookies.py
-scrapy/contrib/downloadermiddleware/ajaxcrawl.py
-scrapy/contrib/statsmailer.py
-scrapy/contrib/memusage.py
-scrapy/commands/deploy.py
-scrapy/commands/bench.py
-scrapy/telnet.py
-scrapy/mail.py
diff --git a/tests/py36/_test_crawl.py b/tests/py36/_test_crawl.py
new file mode 100644
index 000000000..162a53760
--- /dev/null
+++ b/tests/py36/_test_crawl.py
@@ -0,0 +1,57 @@
+import asyncio
+
+from scrapy import Request
+from tests.spiders import SimpleSpider
+
+
+class AsyncDefAsyncioGenSpider(SimpleSpider):
+
+    name = 'asyncdef_asyncio_gen'
+
+    async def parse(self, response):
+        await asyncio.sleep(0.2)
+        yield {'foo': 42}
+        self.logger.info("Got response %d" % response.status)
+
+
+class AsyncDefAsyncioGenLoopSpider(SimpleSpider):
+
+    name = 'asyncdef_asyncio_gen_loop'
+
+    async def parse(self, response):
+        for i in range(10):
+            await asyncio.sleep(0.1)
+            yield {'foo': i}
+        self.logger.info("Got response %d" % response.status)
+
+
+class AsyncDefAsyncioGenComplexSpider(SimpleSpider):
+
+    name = 'asyncdef_asyncio_gen_complex'
+    initial_reqs = 4
+    following_reqs = 3
+    depth = 2
+
+    def _get_req(self, index, cb=None):
+        return Request(self.mockserver.url("/status?n=200&request=%d" % index),
+                       meta={'index': index},
+                       dont_filter=True,
+                       callback=cb)
+
+    def start_requests(self):
+        for i in range(1, self.initial_reqs + 1):
+            yield self._get_req(i)
+
+    async def parse(self, response):
+        index = response.meta['index']
+        yield {'index': index}
+        if index < 10 ** self.depth:
+            for new_index in range(10 * index, 10 * index + self.following_reqs):
+                yield self._get_req(new_index)
+        yield self._get_req(index, cb=self.parse2)
+        await asyncio.sleep(0.1)
+        yield {'index': index + 5}
+
+    async def parse2(self, response):
+        await asyncio.sleep(0.1)
+        yield {'index2': response.meta['index']}
diff --git a/tests/requirements-py3.txt b/tests/requirements-py3.txt
new file mode 100644
index 000000000..44ddcded8
--- /dev/null
+++ b/tests/requirements-py3.txt
@@ -0,0 +1,22 @@
+# Tests requirements
+attrs
+dataclasses; python_version == '3.6'
+mitmproxy; python_version >= '3.7'
+mitmproxy >= 4, < 5; python_version >= '3.6' and python_version < '3.7'
+mitmproxy < 4; python_version < '3.6'
+pyftpdlib
+# https://github.com/pytest-dev/pytest-twisted/issues/93
+pytest != 5.4, != 5.4.1
+pytest-azurepipelines
+pytest-cov
+pytest-twisted >= 1.11
+pytest-xdist
+sybil >= 1.3.0  # https://github.com/cjw296/sybil/issues/20#issuecomment-605433422
+testfixtures
+uvloop; platform_system != "Windows"
+
+# optional for shell wrapper tests
+bpython
+brotlipy
+ipython
+pywin32; sys_platform == "win32"
diff --git a/tests/requirements.txt b/tests/requirements.txt
deleted file mode 100644
index b7d6a0a56..000000000
--- a/tests/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-# Tests requirements
-mock
-mitmproxy >= 0.10
-pytest-twisted
diff --git a/tests/sample_data/compressed/html-br.bin b/tests/sample_data/compressed/html-br.bin
new file mode 100644
index 000000000..c7eea4bb8
Binary files /dev/null and b/tests/sample_data/compressed/html-br.bin differ
diff --git a/tests/sample_data/compressed/unexpected-eof-output.txt b/tests/sample_data/compressed/unexpected-eof-output.txt
new file mode 100644
index 000000000..3b201255f
--- /dev/null
+++ b/tests/sample_data/compressed/unexpected-eof-output.txt
@@ -0,0 +1 @@
+document.write('¡¡¡¡¡°ÎÒ½ÐºéÐ¡±¦£¬ÊÇÒ»¸ö·Ç³£¸ß¶Ë·Ç³£Å£±ÆµÄÖ°Òµ¶þÊÀ×æ£¬È´ÔÚÒ»¸öÔÂºÚ·ç¸ßÉìÊÖ²»¼ûÎåÖ¸µÄÒ¹ÍíÎÒÄªÃûÆäÃîµÄ¾Í±»ÀÏ°Ö¸øÒ»½Åõßµ½ÁËÕâÃ´¸öµØ·½¡­¡­¡±<p>¡¡¡¡¡°ÔÚ´©Ô½µÄÄÇÒ»¿Ì£¬ÎÒÏëÆðÁËÔÚÑ§Ð£Ñ§µÄÓïÊýÍâÎï»¯Éú¡ª¡ªÎÒ¶¼²»»á¡­¡­¡±<p>¡¡¡¡¡­¡­<p>¡¡¡¡ÌìÔË´óÂ½£¬³àÔÆ¹ú»Ê¹¬Ö®ÖÐ£¬»ÊµÛºéÎÄÇåµÄÊé·¿ÐþÇå¸óÄÚ¡£<p>¡¡¡¡¡°±ÝÏÂ£¬¡±Ò»Ãû»¤ÎÀ´Ò´ÒÀ´±¨£º¡°Íâ³öÔÆÓÎÊ®¶þÄêµÄ¹úÊ¦»ØÀ´ÁË£¬ÏÖÔÚÕýÔÚÐþÑôµîÄÚµÈºò±ÝÏÂ¡£¡±<p>¡¡¡¡¡°Å¶£¿¹úÊ¦»ØÀ´ÁË£¿¡±Ìýµ½»¤ÎÀµÄ»°£¬»ÊµÛºéÎÄÇåÏÈÊÇÒ»ã¶£¬Ëæºó·ÅÏÂÊÖÖÐµÄ×àÕÛ£¬ÆðÉíÐ¦µÀ£º¡°Í¨±¨Ò»Éù£¬ËµëÞ¾Íµ½¡£¡±<p>¡¡¡¡ºéÎÄÇå£¬ËÄÊ®°ËËê£¬³àÔÆ¹úµÚÊ®°ËÈÎ»ÊµÛ£¬ÉúµÄ·½Ãæ´ó¶ú£¬ÏàÃ²ÌÃÌÃ£¬ÓÈÆä°®ÃñÈç×Ó£¬ÉîÊÜ°ÙÐÕ°®´÷£¬ËãµÃÉÏÊÇÖÎ¹úÓÐ·½µÄÒ»´úÃ÷¾ý¡£<p>¡¡¡¡¡°ÊÇ¡£¡±»¤ÎÀ½ÓÁî£¬Õâ¾Í×¼±¸Àë¿ª£¬²»¹ý×ßÖ®ºóºöÈ»¶ÙÁËÏÂ£¬ËæºóµÀ£º¡°¶ÔÁË£¬±ÝÏÂ£¬³ýÁË¹úÊ¦´óÈËÒÔ¼°ËûµÄÅ®¶ùÔÆ²ÊóïÐ¡½ãÖ®Íâ£¬»¹ÓÐÃûÄêÇáÈË¡£¾ÝÊôÏÂ°µÖÐ´òÌ½£¬´ËÈË¾ÝËµÊÇÀ×Òô¸ó¶þ³¤ÀÏÒ¶·ÉºèÖ®×ÓÒ¶½úÄþ¡£¡±<p>¡¡¡¡¡°À×Òô¸ó¶þ³¤ÀÏÖ®×Ó£¿¡±ºéÎÄÇåÖåÁËÖåÃ¼£¬ËæºóÇáÇá»ÓÁË»ÓÊÖ£º¡°ÄãÏÈÏÂÈ¥°É¡£¡±<p>¡¡¡¡¡°ÊÇ¡£¡±<p>¡¡¡¡»¤ÎÀÏÂÈ¥ºó£¬ºéÎÄÇåÏëÁËÏë£¬²»Ñ¡ÁúÅÛ£¬·´¶øÌØÒâ´©ÉÏÒ»Éí±ã×°£¬Ö®ºóÃþÁËÃþÏÂ°ÍÉÏµÄºúÐë£¬à«à«µÀ£º¡°¹úÊ¦ÔÆÓÎÊ®¶þÄê£¬Èç½ñ»ØÀ´£¬Äª²»ÊÇÒòÎªÄÇ¸ö»éÔ¼£¿¶÷£¬°´ÕÕÄê¼ÍÀ´Ëã£¬ÏëÀ´Ó¦¸ÃÊÇÁË¡£Ö»²»¹ý£¬Õâ¸öÀ×Òô¸ó¶þ³¤ÀÏµÄ¶ù×Ó£¬ÓÖÀ´×öÊ²Ã´£¿¡±<p>¡¡¡¡¡­¡­<p>¡¡¡¡ÐþÑôÒéÊÂ´óµîÖ®ÄÚ¡£<p>¡¡¡¡¡°¹þ¹þ£¬Ê®¶þÄê²»¼û£¬¹úÊ¦·ç²ÉÓÈÊ¤ÍùÎô£¬Ïë±ØÎäÑ§ÓÖÓÐ½ø¾³£¬¿ÉÏ²¿ÉºØ°¡¡£¡±»ÊµÛºéÎÄÇåÒ»½øÒéÊÂ´óµî£¬µÚÒ»ÑÛ¿´µ½µîÄÚÒ»ÃûÇàÅÛÄÐ×Ó£¬Á¢Ê±±ã¼±ÐÐÁ½²½£¬Á½ÈËË«ÊÖÏàÎÕ£¬¹þ¹þ´óÐ¦£¬ºéÎÄÇåµÀ£º¡°ÕâÒ»±ðÊ®¶þÄê£¬¿ÉÕæÊÇÏëÉ·ÎÒÒ²£¡¡±<p>¡¡¡¡ÄÇÖÐÄêÄÐ×Ó¿´Äê¼Í´óÔ¼ÎåÊ®À´Ëê£¬ÊÖÖÐÒ»°Ñ¶ìÃ«ÓðÉÈ£¬Í·ÉÏÁ½³ßÇà½í£¬ò¢ÏÂÈýç¸³¤Ðë£¬ÉúµÄÏÉ·çµÀ¹Ç£¬¹ËÅÎÖ®¼ä¼«ÓÐÆøÊÆ¡£<p>¡¡¡¡ÄÜ½ÐºéÎÄÇåÕâÎ»»ÊµÛÈç´Ë¶Ô´ý£¬´ËÈËÕýÊÇ³àÔÆ¹ú»¤¹úÎäÉñ£¬±»×ðÎª¹úÊ¦µÄÆßÐÇÎäÕß£¬ÔÆÊ¥Ðù£¡<p>¡¡¡¡¡°±ÝÏÂ¿ÍÆø¡£¡±ÉíÎª»¤¹úÎäÉñ£¬ÔÆÊ¥ÐùÓë»ÊµÛºéÎÄÇåÖ®¼äµÄ¹ØÏµÒ»Ïò¼«ÎªÁ¼ºÃ£¬ÏòÀ´Æ½ÆðÆ½×ø£¬Ò²²»ÒâÍâ£¬Î¢Ð¦µÀ£º¡°´Ë·¬ÔÆÄ³ÔÆÓÎ¹éÀ´£¬ÊµÊÇÎªÁËÐ¡Å®²ÊóïµÄ»éÊÂ¶øÀ´¡£¡±<p>¡¡¡¡¹ûÈ»£¡<p>¡¡¡¡ÌýÁËÔÆÊ¥ÐùµÄ»°£¬ºéÎÄÇåµ±¼´ÏòÖ®Ç°Ò»Ö±Õ¾ÔÚÔÆÊ¥Ðù²»Ô¶´¦µÄÒ»ÃûÉÙÅ®¿´È¥¡£<p>¡¡¡¡ÄÇÉÙÅ®´©×ÅÒ»Éíµ­ÂÌÉ«µÄ³¤È¹£¬´óÔ¼¶þ°ËÄê»ª£¬Ò»Ë«ËÆË®µÄíø×Ó£¬Áé¹âÉÁ¶¯¡£·ôÈçÄýÖ¬£¬Ñ©°×ÖÐÍ¸×Å·Ûºì£¬ËÆºõÄÜÅ¡³öË®À´¡£ÓñÊÖÊ®Ö¸ÏËÏË£¬ÈáÈôÎÞ¹Ç£¬ÇáÇáµÄÎÕ×ÅÒ»Ö§Í¨Ìå±ÌÂÌµÄÓñóï¡£Ò»Í·¼°ÑüÇàË¿Ëæ·çÎè¶¯£¬ÏÉ×Ó°ãÉñ²ÊÍÑË×¡£<p>¡¡¡¡¡°ÔÆ²Êóï£¬¼û¹ý±ÝÏÂ¡£¡±ÑÛ¼ûºéÎÄÇåÍûÀ´£¬ÉÙÅ®µ±¼´ÐÐÀñ¡£<p>¡¡¡¡¾øÃÀµÄÈË£¬¾øºÃµÄ³öÉí¡£<p>¡¡¡¡ÕýÊÇ»¤¹úÎäÉñÔÆÊ¥ÐùÎ¨Ò»µÄÕÆÉÏÃ÷Öé£¬ÔÆ²Êóï£¡<p>¡¡¡¡¡°°¥Ñ½£¬ÕâÊ®¶þÄêÃ»¼û£¬²Êóï¾ÓÈ»ÒÑ¾­³öÂäµÄÈç´ËÃÀÀö£¡¡±ºéÎÄÇåÖ®Ç°¾Í×¢Òâµ½ÁËÕâ¾øÃÀµÄÉÙÅ®£¬´ËÊ±ÌýËýÈ·ÈÏ£¬Ô½·¢ÐÀÏ²£¬Ð¦µÀ£º¡°´ó¼Ò¶¼²»ÊÇÍâÈË£¬½ÐÎÒÊåÊå°É£¬ÕâÑùÏÔµÃÇ×½üÐ©¡£¡±<p>¡¡¡¡ÔÆ²ÊóïÇáÇáµÄ¡°¶÷¡±ÁËÒ»Éù£¬ËãÊÇ´ðÓ¦¡£<p>¡¡¡¡ÔÆÊ¥ÐùÁ½ÈËº®êÑÍê±Ï£¬ºéÎÄÇå×îºó¿´ÏòÄÇÃû»¤ÎÀ¿ÚÖÐµÄÄêÇáÈË£¬Î¢Ð¦×ÅÎÊµÀ£º¡°ÕâÎ»ÉÙÏÀÊÇ¡­¡­¡±<p>¡¡¡¡ÌýµÃºéÎÄÇåÎÊÆð£¬ÄêÇáÈË¸Ï½ô±§È­ÐÐÀñ£º¡°Áù´óÕýÍ³Ö®Ò»£¬À×Òô¸ó×ùÏÂ¶þ³¤ÀÏÖ®×Ó£¬Ò¶½úÄþ¼û¹ý±ÝÏÂ¡£¡±ÕâÒ¶½úÄþÉúµÄÒÇ±íÌÃÌÃ£¬½£Ã¼ÐÇÄ¿£¬Ò»Éí°×ÒÂÈçÑ©£¬¶ËµÄÒ»¸±ºÃÂôÏà¡£Ö»²»¹ýËûËäÈ»ÊÇÔÚÏòºéÎÄÇåÐÐÀñ£¬¿ÉÊÇÑÛÉñÈ´²»Ê±ÍµÍµ¿´ÏòÔÆ²Êóï£¬ÐÐÎª¹îÒì¡£<p>¡¡¡¡¡°¶÷£¬Ó¢ÐÛ³öÉÙÄê£¬²»´í£¬²»´í¡£¡±Ò¶½úÄþµÄÑÛÉñ±»ºéÎÄÇå¿´ÔÚÑÛÀï£¬Ë²Ï¢Ö®¼äºéÎÄÇå±ãÃ÷°×¹ýÀ´£¬ËûÃ¼Í·Î¢Î¢ÖåÁËÖå£¬²»¹ýºÜ¿ìÊæÕ¹¿ªÀ´£¬ÇáÇáµãÁËµãÍ·£¬Í¬Ê±ÐÄÖÐÈ´°µµÀ£º¡°²ÊóïÓ×Äê±ãÒòÎªÌì¸³¼«¼Ñ£¬ËÍÍùÁù´óÕýÍ³Ö®Ò»µÄÀ×Òô¸óÐÞÁ¶£¬Èç½ñËý»ØÀ´£¬È´¸úÀ´ÁËÕâÃ´Ò»¸öÈË£¬¿´Ñù×Ó½ñÌìµÄÊÂÇé£¬ÅÂÊÇÃ»ÄÇÃ´¼òµ¥¡£¡±<p>¡¡¡¡¡°ºÇºÇ£¬±ÝÏÂ£¬¡±Èç½ñÈËÒÑ¼û¹ý£¬ÔÆÊ¥ÐùÓðÉÈÇáÒ¡£¬Ð¡ÉùÌáÐÑµÀ£º¡°°´ÕÕµ±ÄêµÄ»éÔ¼£¬²»Öª¾Å»Ê×Ó£¬¿É»¹°²ºÃ£¿¡±<p>¡¡¡¡³àÔÆ¹ú£¬Ã¿ÈÎ»ÊµÛ£¬Óëµ±´ú»¤¹úÎäÉñ£¬±ØÐëÒª½á³É»éÔ¼£¬ÓÃÒÔÀ­½üË«·½Ö®¼äµÄÓÑºÃ¹ØÏµ¡£<p>¡¡¡¡Èç¹û»¤¹úÎäÉñÑ¡µÄÊÇÄÐ£¬ÄÇ»ÊµÛÕâ±ß£¬±ãÑ¡Ò»¹«Ö÷ÏÂ¼Þ£»¶øÈôÊÇ»¤¹úÎäÉñÕâ±ßÊÇÅ®£¬ÄÇ»ÊµÛÕâ±ß£¬ÔòÒªÑ¡Ò»Ãû»Ê×ÓÓ­È¢´ËÅ®¹ýÃÅ¡£<p>¡¡¡¡±¾ÈÎ»¤¹úÎäÉñÔÆÊ¥ÐùÏ¥ÏÂÖ»ÓÐÒ»Å®£¬¹Ê¶ø£¬ºéÎÄÇå±ãÐèÒªÑ¡³öÒ»Ãû»Ê×ÓÀ´ÓëÖ®³É»é¡£¶øÕâÃû±»Ñ¡¶¨µÄ»Ê×Ó£¬ÕýÊÇÓëÔÆ²ÊóïÄêÁäÏàµ±£¬×îÊÜºéÎÄÇåÏ²°®ÐÄÌÛµÄ¾Å»Ê×Ó£¬ºéÐ¡±¦£¡<p>¡¡¡¡Ö®ËùÒÔ½ÐºéÐ¡±¦£¬ÊÇÒòÎª¾Å»Ê×Ó³öÉúÊ±Éí×Ó¹ÇÏà¶Ôµ¥±¡£¬ÓÖÊÇ×îÐ¡µÄ±¦±´¸í´ñ£¬ËùÒÔÆðÃûÐ¡±¦£¬ÒâË¼ÊÇÏ£ÍûËûÏñ´«¼Ò±¦Ò»Ñù¼áÍ¦¡ª¡ªËµÆðÀ´£¬Õâ¸öÃû×Ö»¹ÊÇµ±ÄêÔÆÊ¥ÐùÇ××ÔÈ¡µÄ¡£<p>¡¡¡¡¡°°¦£¬ÎÒ¼ÒÐ¡±¦Õâº¢×Ó£¬¡±ÌáÆð×Ô¼ºÕâ¸ö×îÌÛ°®È´Ò²ÊÇ×î½ÐÈËÍ·ÌÛµÄµÚ¾Å×Ó£¬ºéÎÄÇåÒ²ÊÇÓÐÐ©ÓôÃÆ£º¡°ËûÒ»Ììµ¹ÊÇÍ¦¿ªÐÄµÄ£¬¾ÍÊÇÓÐÐ©²»ÎñÕýÒµ£¬½ÐÈËÍ·ÌÛ¡£¡±<p>¡¡¡¡¡°Å¶£¿²»ÎñÕýÒµ£¿¡±ÌýÁËÕâ»°£¬ÔÆÊ¥Ðù¶ÙÊ±ÖåÃ¼µÀ£º¡°Äª·Çµ±ÄêÎÒÁô¸øËûµÄÄÇ²¿¡¶Á÷ÔÆÓ¥×¦¹¦¡·£¬ËûÒ»Ö±¶¼Ã»ÓÐÐÞÁ¶£¿¡±<p>¡¡¡¡ºéÐ¡±¦ÌåÖÊÈõ£¬ÔÆÊ¥ÐùÊÇÖªµÀµÄ£¬Ëû³öÓÎÇ°ÌØÒâÁô¸øºéÐ¡±¦Ò»²¿¹¦·¨ÃØ¼®½ÐËûÐÞÁ¶£¬ÒÔ±ã½ÐËûÇ¿Éí½¡Ìå£¬¿ÉÊÇÌýºéÎÄÇåµÄ»°£¬Õâ¹¦·¨£¬ËûÃ»Á·£¿<p>¡¡¡¡¡°°¦£¬Ò»ÑÔÄÑ¾¡°¡£¬¡±ºéÎÄÇå³¤Ì¾¿ÚÆø£¬ÎÞÄÎµÄÒ¡ÁËÒ¡Í·£¬µÀ£º¡°¹úÊ¦»¹ÊÇÓëÎÒÒ»µÀ£¬ÏÈÈ¥¿´¿´ÔÙËµ°É¡£¡±<p>¡¡¡¡ÕâÊÇÔõÃ´»ØÊÂ£¬¿´Ñù×ÓÊÂÇéºÃÏñ²»´ó¶ÔÍ·¡£<p>¡¡¡¡ÔÆÊ¥ÐùÓëÅ®¶ùÔÆ²Êóï¶ÔÊÓÒ»ÑÛ£¬Ò»ÆðµãÍ·£º¡°ºÃ¡£¡±Õâ¾Í³öÃÅ£¬Ç°Íù¾Å»Ê×ÓËùÔÚµÄÐþÏö¸ó¡£Ò»Â·ÉÏ£¬Ò¶½úÄþ½ô¸úÔÆ²ÊóïÉí±ß£¬ºéÎÄÇå¿´ÔÚÑÛÖÐ£¬ÐÄÏÂÒÑ¾­ÁËÈ»¡£<p>¡¡¡¡½á¹û¸ÕÒ»½øÐþÏö¸óµÄ´óÃÅ£¬¾Í¿´µ½ÎÞÊýµÄÆÍÈËÄÃ×ÅÌúÇÂ²ù×ÓÒ»Â··É±¼£¬Óë´ËÍ¬Ê±Ò»Õó¶ñ³ôÎ¶´«À´£¬ºéÎÄÇåµÈËÄÈËÅ¤Í·¿´È¥£¬¾ÓÈ»¿´µ½¼¸ÃûÆÍÈËÍÆ¹ýÀ´Ò»Á¾·à³µ£¡<p>¡¡¡¡Ò»ÏòÏÉ·çµÀ¹ÇÆÄÓÐµãÆ®Æ®ºõÓð»¯µÇÏÉÎ¶µÀµÄÔÆÊ¥ÐùÖ±½Ó¾Í¿´´ôÁËÑÛ£º¡°Ê²Ã´Çé¿ö£¿¡±<p>¡¡¡¡ÕýËùÎ½ÊÂ³ö·´³£±ØÓÐÑý£¬Ò»½øÃÅ¾Í¿´µ½ÕâÃ´¸öÇé¿ö£¬ËÄÈË·´µ¹²»×Å¼±ÁË£¬ºéÎÄÇåÖÆÖ¹ÁËÓûÒªÇë°²µÄÆÍÈË£¬×öÁË¸öàäÉùµÄÊÖÊÆ£¬Ëæºó¸úÔÚºóÃæ£¬¿´¿´Õâµ½µ×ÊÇÔÚ¸ãÊ²Ã´Ðþ»ú¡£<p>¡¡¡¡Ò»Â·Ç°ÐÐ£¬ºÜ¿ì£¬ËÄÈË¾ÍÌýµ½Ò»°ÑÂÔÎ¢ÓÐÐ©ÄêÇáµÄÉùÒôÏìÆð¡ª¡ª<p>¡¡¡¡¡°ÚÀ£¬ÊÖ½ÅÒ»¶¨Òª×ÐÏ¸µã£¬¶¼Ìýµ½ÁËÂð£¿Õâ¿ÉÊÇ×î¹Ø¼üµÄÒ»²½£¬²»ÄÜÓÐ°ëµã²î´í£¡¡±ÄÇÉùÒôÌýÆðÀ´´óÔ¼¶þÊ®À´Ëê£¬ËäÈ»²»ËãµÍ³ÁÐÛºñ£¬¿ÉÊÇÌýÔÚ¶úÖÐµ¹Ò²Í¦Êæ·þ£¬ºéÎÄÇåÐ¦µÀ£º¡°Ð¡±¦Õâº¢×Ó£¬²»ÖªÔÚ¸ãÊ²Ã´£¬ÎÒÃÇÇÒÏÈ¿´ËûÒ»¿´£¡¡±<p>¡¡¡¡ÔÆÊ¥ÐùºÍÔÆ²ÊóïÒ²ÊÇºÃÆæ£¬Ò¶½úÄþÔò¹´ÁË¹´×ì½Ç£¬³¶³öÒ»Ë¿²»Ð¼µÄÐ¦Òâ¡£ÈýÈË¸úºéÎÄÇåÒ»ÆðÈÌ×¡³ôÎ¶£¬ÕÒÁË¸öÒþ±ÎµÄµØ·½£¬ÍµÍµ¹Û¿´¡£<p>¡¡¡¡ÕâÀïµØ´¦ÐþÏö¸óµÄºóÔº£¬ÖÜÎ§ÂÌÊ÷´Ð´Ð£¬Ð¡ÇÅÁ÷Ë®£¬·ç¾°¼«¼Ñ¡£¶ø¾Å»Ê×ÓºéÐ¡±¦£¬ÔòÕýÖ¸»Ó×ÅÆÍÈËÃÇÔÚµØÏÂÍÚ×ÅÊ²Ã´£¬±ßÉÏ»¹·Å×Å²»ÉÙµÄÌú¹Ü¡¢ÌÕ´ÉÆ¬¡¢Ä¾Í·Ö®ÀàµÄÆ÷Îï¡£×îÆæÃîµÄÊÇ£¬¾ÍÔÚºéÐ¡±¦µÄ²»Ô¶´¦£¬»¹°Ú×ÅÒ»¸öÆæ¹ÖµÄ°×É«ÌÕ´ÉÔì¾ÍµÄ¶«Î÷£¡<p>¡¡¡¡ÄÇ¸ö¶«Î÷ÓÐ×ÅÒ»¸öÍÖÔ²ÐÎµÄ¿Ú×Ó£¬ÏÂÃæÊÇµ××ù£¬ºóÃæ»¹´ø×ÅÒ»¸ö·½·½ÕýÕýµÄÏä×Ó£¬²»ÖªºÎÓÃ¡£<p>¡¡¡¡²»¹ýºÜ¿ìËÄÈË¾ÍÖªµÀÁË¡£Ö»ÌýºéÐ¡±¦ËµµÀ£º¡°¶¼Ð¡ÐÄÐ©¹þ£¬Õâ¿ÉÊÇÎÒÐÂÑÐÖÆ³öÀ´µÄÂíÍ°£¬ºÜ½ð¹óµÄ£¬ÌÕ´É×öµÄ£¬±ðÅö»µÁË£¡¡±<p>¡¡¡¡ÂíÍ°£¿ÄÇÊÇÊ²Ã´¶«Î÷£¿<p>¡¡¡¡Ç¿ÈÌ×¡ÐÄÖÐºÃÆæ£¬ºéÎÄÇåËÄÈË¼ÌÐøÍµ¿´¡£<p>¡¡¡¡¡°¶Ô¶Ô¶Ô£¬¾ÍÕâÑù£¬¼ÌÐøÍÚ£¬¶÷£¬ÔÙÍÚÉîµã£¬´óµã£¬ÒªÖ±ÉÏÖ±ÏÂµÄ£¡¡±ºéÐ¡±¦¼ÌÐøÖ¸»Ó£¬È´²»ÏëÆÍÈËÃÇÕýÍÚ×ÅÄØ£¬ºöÈ»Ò»ÃûÆÍÈË´óÉùµÀ£º¡°¾Å»Ê×Ó£¬ÎÒÍÚ³öÀ´Ò»¸ö¶«Î÷£¡¡±<p>¡¡¡¡ºéÐ¡±¦ºÃÆæµÀ£º¡°Ê²Ã´¶«Î÷£¿¸øÎÒ¿´¿´£¡¡±<p>¡¡¡¡¡°ÔÚÕâÄØ¡£¡±ÄÇÆÍÈË¸Ï½ô°ÑÍÚµ½µÄ¶«Î÷½»µ½ºéÐ¡±¦ÕâÎ»¾Å»Ê×ÓµÄÊÖÉÏ¡£¶«Î÷ºÜÔà£¬ÉÏÃæÕ´ÂúÄàÍÁ£¬ºéÐ¡±¦Ò²²»½éÒâ£¬Éì³öÐä×ÓÖ±½Ó²ÁÁË¸É¾»£¬Ö®ºóÖÕÓÚ¿´µ½Õâµ½µ×ÊÇ¸öÊ²Ã´¶«Î÷¡£<p>¡¡¡¡ÄÇÊÇÒ»¸ö·Ç³£Æ¯ÁÁµÄÊÖïí£¬ÉÏÃæÏâ×Åºì³È»ÆÂÌÇàÀ¶×ÏºÚ°×Ò»¹²¾Å¸öÑÕÉ«µÄÓñÊ¯£¬ÄÇÓñÊ¯ÉÏ»¹É¢·¢×ÅÒ»Ë¿µ­µ­µÄÎíÆø£¬ÃþÔÚÊÖÀï»¹ÓÐÐ©Å¯Å¯µÄ£¬·Ç³£ÉñÆæ¡£<p>¡¡¡¡¡°ºÃ¶«Î÷£¡¡±ºéÐ¡±¦´óÏ²£¬Ö±½Ó°ÑÄÇ´®Öé×Ó´÷ÊÖÍóÉÏ£¬Ð¦µÀ£º¡°Ã¿ÈËÉÍ¶þÁ½£¬»ØÍ·×Ô¼ºÈ¥Áì£¡¡±<p>¡¡¡¡ÖÚÆÍÈË¸Ð¶÷´÷µÂ£º¡°Ð»¾Å»Ê×Ó£¡¡±È»ºó¼ÌÐø¿ªÍÚ£¡<p>¡¡¡¡ÕâÒ»ÍÚ¾Í×ã×ãÍÚÁËÐ¡°ë¸öÊ±³½£¬ÖÕÓÚ´ó¿ÓÍÚºÃ£¬¾Å»Ê×ÓÏÈ½ÐÈË°ÑÀïÃæÓÃ´óÔ¼Áù³ß×óÓÒ³¤¶ÌµÄÌÕ´ÉÆ¬µæºÃ£¬È»ºóÏÂ¹ÜÂñÍÁ£¬ÔÙÖ®ºó°Ñ·à³µÍÆÀ´£¬ºéÐ¡±¦ÑÚ×Å±Ç×Ó½ÐÆÍÈË°Ñ·à³µÀïÃæÄÇ¶ñ³ôµÄ±ãÄçÖ®ÎïÈ«²¿µ¹ÁËÏÂÈ¥¡ª¡ªÕâÒ»ÏÂ³¡ÉÏ¸ü³ôÁË¡­¡­<p>¡¡¡¡ÕâÐ©»î¸ÉÍê£¬Ö®ºó¾ÍÊÇÉÏ°å¹Ì¶¨£¬×îºóÓÖ°ÑÂíÍ°¹Ì¶¨ºÃ£¬Ò»ÕóÎ¢·ç´µ¹ý£¬×ÜËãÒ»ÇÐ¸ã¶¨¡£<p>¡¡¡¡¡°Ð¡±¦£¬ÄãÕâ¹Äµ·Ê²Ã´ÄØ£¿¡±ÑÛ¿´ÕâÊ±ºòÆÍÈËÒÑ¾­¿ªÊ¼ÔÚÖÜÎ§¸ÇÐ¡·¿×Ó£¬ºéÎÄÇåËÄÈËÕâ²Å×ß³öÀ´£¬ºéÎÄÇåÖ¸×ÅÄÇ¸öÂíÍ°£¬ÎÊµÀ£º¡°Õâ¶«Î÷¸ÉÊ²Ã´µÄ£¿¡±<p>¡¡¡¡ÑÛ¼û¸¸»ÊÀ´ÎÊ£¬ºéÐ¡±¦ºÙºÙÐ¦ÁËÐ¦£¬ÓÐµã²»ºÃÒâË¼£¬´ðµÀ£º¡°Õâ¶«Î÷ÎÒ¹ÜËû½Ð³éË®ÂíÍ°£¬¿ÉÓÐÓÃÁËÄØ£¬È¥Ã©²ÞµÄÊ±ºòÖ»Òª×øÔÚÕâÉÏÃæ·½±ã£¬µÈ·½±ãÍêÁË£¬Ò»°´Õâ¿ª¹Ø£¬ÕâºóÃæË®ÏäÀïµÄË®¾ÍÁ÷ÏÂÀ´£¬È»ºó¾Í³å¸É¾»ÁË£¬ºÜÊµÓÃµÄ£¡¡±<p>¡¡¡¡¡°Äã¹Äµ·ÕâÃ´°ëÌì£¬¾ÍÊÇÅªÁË¸öÃ©²Þ³öÀ´£¿£¡¡±ºéÎÄÇåËÄÈËÈ«²¿¶¼¾ª´ôÁË£¡<p>¡¡¡¡ÓÈÆäÊÇÔÆÊ¥ÐùºÍÔÆ²Êóï£¬¸üÊÇÌýµÄÄ¿µÉ¿Ú´ô£¡Ò¶½úÄþÔòÊÇÅ¤¹ýÉíÈ¥£¬¼ç°ò²»×¡¶¶¶¯¡£<p>¡¡¡¡Õâ¾Å»Ê×ÓÖ®Ç°ºéÎÄÇå¾ÍËµËû²»ÎñÕýÒµ£¬ÏÖÔÚ¿´À´£¬»¹ÕæÊÇ£¡<p>¡¡¡¡¡°°¡£¬µ±È»£¬²»¹âÊÇÃ©²Þ£¬¡±ºéÐ¡±¦·Ç³£µÃÒâ£¬µÀ£º¡°ÕâÊÇÎÒµÄÒ»ÏîÐÂ·¢Ã÷£¬ÎÒ¹ÜÕâ¸ö½ÐÕÓÆø³Ø£¡¼òµ¥À´Ëµ£¬¾ÍÊÇµ±ÕâÐ©¡­¡­¶÷¶÷£¬»Æ°×Ö®ÎïÔÚÕâ±ã³ØÀï·¢½ÍÖ®ºó£¬»á²úÉúÒ»ÖÖÆøÌå£¬¿ÉÒÔÓÃÀ´µã»ðÔì·¹ÓÃµÄ£¬·½±ã£¬»·±££¬·Ç³£ºÃÓÃ£¡¡±<p>¡¡¡¡ÔÆÊ¥ÐùÒÑ¾­ÓÐµãÌý²»ÏÂÈ¥ÁË¡ª¡ª²»ºÃºÃÁ·Îä£¬¸ãÕâÐ©ÓÐµÄÃ»µÄµÄÂÒÆß°ËÔãµÄ¶«Î÷£¬ÓÐÓÃ£¿<p>¡¡¡¡¡°¶÷£¬Ð¡±¦°¡£¬¡±ÔÆÊ¥ÐùÉÏÇ°Ò»²½£º¡°ÎÒµ±³õÁô¸øÄãµÄÄÇ²¿¡¶Á÷ÔÆÓ¥×¦¹¦¡·£¬Äã¿ÉÔøÏ°Á·£¿¡±<p>¡¡¡¡¡°ÄúÊÇÔÆ²®²®£¿¡±¿´µ½ÔÆÊ¥Ðù£¬ºéÐ¡±¦¼±Ã¦ÉÏÇ°¼ûÀñ£º¡°ÔÆ²®²®£¬ºÃ¾ÃÃ»¼ûÀ²£¡Äú¸øÎÒÁôµÄ¹¦·¨£¬ÎÒÊÇÒ»¶¨ÒªÁ·µÄÑ½£¡¡±<p>¡¡¡¡¡°ÄÇÁ·µÄÈçºÎÁË£¿¸øÎÒÊ¹À´¿´¿´£¿¡±ÔÆÊ¥Ðù¼±ÎÊ¡£<p>¡¡¡¡¡°°¡£¬ÄÇÅÂÊÇ²»ÐÐ£¬¡±ºéÐ¡±¦ÂúÁ³²»ºÃÒâË¼µÄ»ØµÀ£º¡°Á·ÊÇÁ·ÁË£¬¾ÍÊÇÃ»Á·³É£¡¡±<p>¡¡¡¡ÕâÊ±ºòÒ»ÅÔµÄÔÆ²Êóï¶¼ÓÐµãÌý²»ÏÂÈ¥ÁË£¬ÖåÁËÖåÃ¼£¬Å¤¹ýÉí×Ó¡£Ò¶½úÄþÔò¡°àÍ¡±µÄÒ»ÉùÐ¦ÁË³öÀ´£¬´Õµ½ÔÆ²Êóï¶ú±ß£¬Ð¡ÉùµÀ£º¡°¿´À´ÄãµÄÕâÎ´»é·ò£¬¹ûÈ»¸ú´«ËµÖÐÒ»Ñù£¬²»Ö»²»ÎñÕýÒµ£¬»¹ÊÇ¸ö·Ï²Ä£¡¡±<p>¡¡¡¡¡ª¡ª¡ª¡ª¡ª¡ª¡ª¡ª¡ª¡ª¡ª¡ª<p>¡¡¡¡ÐÂÊéÐÂÆøÏó£¬ÐÂÊéÉÏ´«À²£¬ºé¼ÒÐ¡±¦¸ç»ªÀöµÇ³¡£¡<p>¡¡¡¡ºÀÃËµÄÐÖµÜÃÇ£¬ÄãÃÇ¿É»¹ÔÚ·ñ£¿<p>¡¡¡¡ÐÂµÄÕ÷³Ì¿ªÆô£¬»¹¿É·ñÅãÎÒÒ»Æð³Ë·çÆÆÀË£¬Åû¾£Õ¶¼¬£¿£¡<p>¡¡¡¡ÇóÊÕ²Ø£¬ÇóÍÆ¼ö£¬Çó´òÉÍ£¬ÇóÊ®·ÖÆÀ¼ÛÆ±£¡ÐÂÊéÇóÒ»ÇÐÖ§³Ö£¡£¡<p>¡¡¡¡¡ª¡ª¡ª¡ª¡ª¡ª¡ª¡ª¡ª¡ª¡ª¡ª<p>¡¡¡¡ÁíÍâÍÆÒ»ÏÂÍê±¾ÀÏÊé¡¶ÖØÉúÖ®Éñ¼¶°Ü¼Ò×Ó¡·£¬¾ù¶©ÆÆÍò×÷Æ·£¬ÖÊÁ¿±£Ö¤£¬Êé»Ä¿É¿´¡£<p>¡¡¡¡<a href=http://www.qidian.com>ÆðµãÖÐÎÄÍøwww.qidian.com»¶Ó­¹ã´óÊéÓÑ¹âÁÙÔÄ¶Á£¬×îÐÂ¡¢×î¿ì¡¢×î»ðµÄÁ¬ÔØ×÷Æ·¾¡ÔÚÆðµãÔ­´´£¡</a><p>¡¡¡¡<a href=http://www.qidian.com>ÆðµãÖÐÎÄÍø www.qidian.com »¶Ó­¹ã´óÊéÓÑ¹âÁÙÔÄ¶Á£¬×îÐÂ¡¢×î¿ì¡¢×î»ðµÄÁ¬ÔØ×÷Æ·¾¡ÔÚÆðµãÔ­´´£¡</a><a>ÊÖ»úÓÃ»§Çëµ½m.qidian.comÔÄ¶Á¡£</a>');
\ No newline at end of file
diff --git a/tests/sample_data/compressed/unexpected-eof.gz b/tests/sample_data/compressed/unexpected-eof.gz
new file mode 100644
index 000000000..96211e432
Binary files /dev/null and b/tests/sample_data/compressed/unexpected-eof.gz differ
diff --git a/tests/sample_data/feeds/feed-sample6.csv b/tests/sample_data/feeds/feed-sample6.csv
new file mode 100644
index 000000000..a2604653e
--- /dev/null
+++ b/tests/sample_data/feeds/feed-sample6.csv
@@ -0,0 +1,6 @@
+'id','name','value'
+1,'alpha','foobar'
+2,'unicode','únícódé‽'
+'3','multi','foo
+bar'
+4,'empty',
diff --git a/tests/sample_data/link_extractor/sgml_linkextractor.html b/tests/sample_data/link_extractor/linkextractor.html
similarity index 71%
rename from tests/sample_data/link_extractor/sgml_linkextractor.html
rename to tests/sample_data/link_extractor/linkextractor.html
index 35aa457ee..2307ea865 100644
--- a/tests/sample_data/link_extractor/sgml_linkextractor.html
+++ b/tests/sample_data/link_extractor/linkextractor.html
@@ -1,7 +1,7 @@
 <html>
 <head>
 <base href='http://example.com' />
-<title>Sample page with links for testing RegexLinkExtractor</title>
+<title>Sample page with links for testing LinkExtractor</title>
 </head>
 <body>
 <div id='wrapper'>
@@ -11,8 +11,10 @@
 </div>
 <a href='http://example.com/sample3.html' title='sample 3'>sample 3 text</a>
 <a href='sample3.html'>sample 3 repetition</a>
+<a href='sample3.html#foo'>sample 3 repetition with fragment</a>
 <a href='http://www.google.com/something'></a>
 <a href='http://example.com/innertag.html'><b>inner</b> tag</a>
+<a href=' page 4.html '>href with whitespaces</a>
 </div>
 </body>
 </html>
diff --git a/tests/sample_data/link_extractor/linkextractor_latin1.html b/tests/sample_data/link_extractor/linkextractor_latin1.html
index 68609d8d3..e7eee18de 100644
--- a/tests/sample_data/link_extractor/linkextractor_latin1.html
+++ b/tests/sample_data/link_extractor/linkextractor_latin1.html
@@ -1,15 +1,18 @@
 <html>
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=latin-1">
-<base href='http://example.com' />
-<title>Sample page with links for testing RegexLinkExtractor</title>
-</head>
+  <head>
+    <meta http-equiv="Content-Type" content="text/html; charset=latin-1">
+    <base href='http://example.com' />
+    <title>Sample page with links for testing LinkExtractor</title>
+  </head>
 <body>
-<div id='wrapper'>
-<div id='subwrapper'>
-<a href='sample_�.html'><img src='sample2.jpg'/></a>
-</div>
-<a href='sample_�.html' title='sample �'>sample � text</a>
-</div>
+  <div id='wrapper'>
+    <div id='subwrapper'>
+      <a href='sample_�.html'><img src='sample2.jpg'/></a>
+    </div>
+    <a href='sample_�.html' title='sample �'>sample � text</a>
+    <div id='subwrapper2'>
+      <a href='sample_�.html?price=�32&�=unit'><img src='sample3.jpg'/></a>
+    </div>
+  </div>
 </body>
 </html>
diff --git a/tests/sample_data/link_extractor/linkextractor_no_href.html b/tests/sample_data/link_extractor/linkextractor_no_href.html
new file mode 100644
index 000000000..0b01cede8
--- /dev/null
+++ b/tests/sample_data/link_extractor/linkextractor_no_href.html
@@ -0,0 +1,25 @@
+<html>
+  <head>
+    <base href='http://example.com' />
+    <title>Sample page with anchor tags containing no href attribute, to test the TextResponse.follow_all method</title>
+  </head>
+
+<body>
+  <div class="quote">
+    <span class="text">“The world as we have created it is a process of our
+      thinking. It cannot be changed without changing our thinking.”</span>
+    <span>
+      by <small class="author">Albert Einstein</small>
+      <a href="/author/Albert-Einstein">(about)</a>
+    </span>
+    <div id="pagination" class="pagination">
+      Tags:
+      <a href="/page/1/">Page 1</a>
+      <a>Current</a>
+      <a href="/page/3/">Page 3</a>
+      <a href="/page/4/">Page 4</a>
+    </div>
+  </div>
+</body>
+
+</html>
\ No newline at end of file
diff --git a/tests/sample_data/test_site/files/images/python-logo-master-v3-TM-flattened.png b/tests/sample_data/test_site/files/images/python-logo-master-v3-TM-flattened.png
new file mode 100644
index 000000000..738f6ed41
Binary files /dev/null and b/tests/sample_data/test_site/files/images/python-logo-master-v3-TM-flattened.png differ
diff --git a/tests/sample_data/test_site/files/images/python-powered-h-50x65.png b/tests/sample_data/test_site/files/images/python-powered-h-50x65.png
new file mode 100644
index 000000000..37f6b4719
Binary files /dev/null and b/tests/sample_data/test_site/files/images/python-powered-h-50x65.png differ
diff --git a/tests/sample_data/test_site/files/images/scrapy.png b/tests/sample_data/test_site/files/images/scrapy.png
new file mode 100644
index 000000000..66e86f567
Binary files /dev/null and b/tests/sample_data/test_site/files/images/scrapy.png differ
diff --git a/tests/spiders.py b/tests/spiders.py
index 83d767f5c..63bd726fb 100644
--- a/tests/spiders.py
+++ b/tests/spiders.py
@@ -1,22 +1,34 @@
 """
 Some spiders used for testing and benchmarking
 """
-
+import asyncio
 import time
-from six.moves.urllib.parse import urlencode
+from urllib.parse import urlencode
 
-from scrapy.spider import Spider
+from twisted.internet import defer
+
+from scrapy import signals
+from scrapy.exceptions import StopDownload
 from scrapy.http import Request
 from scrapy.item import Item
-from scrapy.contrib.linkextractors import LinkExtractor
+from scrapy.linkextractors import LinkExtractor
+from scrapy.spiders import Spider
+from scrapy.spiders.crawl import CrawlSpider, Rule
+from scrapy.utils.test import get_from_asyncio_queue
 
 
-class MetaSpider(Spider):
+class MockServerSpider(Spider):
+    def __init__(self, mockserver=None, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.mockserver = mockserver
+
+
+class MetaSpider(MockServerSpider):
 
     name = 'meta'
 
     def __init__(self, *args, **kwargs):
-        super(MetaSpider, self).__init__(*args, **kwargs)
+        super().__init__(*args, **kwargs)
         self.meta = {}
 
     def closed(self, reason):
@@ -29,11 +41,11 @@ class FollowAllSpider(MetaSpider):
     link_extractor = LinkExtractor()
 
     def __init__(self, total=10, show=20, order="rand", maxlatency=0.0, *args, **kwargs):
-        super(FollowAllSpider, self).__init__(*args, **kwargs)
+        super().__init__(*args, **kwargs)
         self.urls_visited = []
         self.times = []
         qargs = {'total': total, 'show': show, 'order': order, 'maxlatency': maxlatency}
-        url = "http://localhost:8998/follow?%s" % urlencode(qargs, doseq=1)
+        url = self.mockserver.url("/follow?%s" % urlencode(qargs, doseq=1))
         self.start_urls = [url]
 
     def parse(self, response):
@@ -48,14 +60,14 @@ class DelaySpider(MetaSpider):
     name = 'delay'
 
     def __init__(self, n=1, b=0, *args, **kwargs):
-        super(DelaySpider, self).__init__(*args, **kwargs)
+        super().__init__(*args, **kwargs)
         self.n = n
         self.b = b
         self.t1 = self.t2 = self.t2_err = 0
 
     def start_requests(self):
         self.t1 = time.time()
-        url = "http://localhost:8998/delay?n=%s&b=%s" % (self.n, self.b)
+        url = self.mockserver.url("/delay?n=%s&b=%s" % (self.n, self.b))
         yield Request(url, callback=self.parse, errback=self.errback)
 
     def parse(self, response):
@@ -70,11 +82,70 @@ class SimpleSpider(MetaSpider):
     name = 'simple'
 
     def __init__(self, url="http://localhost:8998", *args, **kwargs):
-        super(SimpleSpider, self).__init__(*args, **kwargs)
+        super().__init__(*args, **kwargs)
         self.start_urls = [url]
 
     def parse(self, response):
-        self.log("Got response %d" % response.status)
+        self.logger.info("Got response %d" % response.status)
+
+
+class AsyncDefSpider(SimpleSpider):
+
+    name = 'asyncdef'
+
+    async def parse(self, response):
+        await defer.succeed(42)
+        self.logger.info("Got response %d" % response.status)
+
+
+class AsyncDefAsyncioSpider(SimpleSpider):
+
+    name = 'asyncdef_asyncio'
+
+    async def parse(self, response):
+        await asyncio.sleep(0.2)
+        status = await get_from_asyncio_queue(response.status)
+        self.logger.info("Got response %d" % status)
+
+
+class AsyncDefAsyncioReturnSpider(SimpleSpider):
+
+    name = 'asyncdef_asyncio_return'
+
+    async def parse(self, response):
+        await asyncio.sleep(0.2)
+        status = await get_from_asyncio_queue(response.status)
+        self.logger.info("Got response %d" % status)
+        return [{'id': 1}, {'id': 2}]
+
+
+class AsyncDefAsyncioReturnSingleElementSpider(SimpleSpider):
+
+    name = "asyncdef_asyncio_return_single_element"
+
+    async def parse(self, response):
+        await asyncio.sleep(0.1)
+        status = await get_from_asyncio_queue(response.status)
+        self.logger.info("Got response %d" % status)
+        return {"foo": 42}
+
+
+class AsyncDefAsyncioReqsReturnSpider(SimpleSpider):
+
+    name = 'asyncdef_asyncio_reqs_return'
+
+    async def parse(self, response):
+        await asyncio.sleep(0.2)
+        req_id = response.meta.get('req_id', 0)
+        status = await get_from_asyncio_queue(response.status)
+        self.logger.info("Got response %d, req_id %d" % (status, req_id))
+        if req_id > 0:
+            return
+        reqs = []
+        for i in range(1, 3):
+            req = Request(self.start_urls[0], dont_filter=True, meta={'req_id': i})
+            reqs.append(req)
+        return reqs
 
 
 class ItemSpider(FollowAllSpider):
@@ -82,9 +153,10 @@ class ItemSpider(FollowAllSpider):
     name = 'item'
 
     def parse(self, response):
-        for request in super(ItemSpider, self).parse(response):
+        for request in super().parse(response):
             yield request
             yield Item()
+            yield {}
 
 
 class DefaultError(Exception):
@@ -100,7 +172,7 @@ class ErrorSpider(FollowAllSpider):
         raise self.exception_cls('Expected exception')
 
     def parse(self, response):
-        for request in super(ErrorSpider, self).parse(response):
+        for request in super().parse(response):
             yield request
             self.raise_exception()
 
@@ -111,26 +183,25 @@ class BrokenStartRequestsSpider(FollowAllSpider):
     fail_yielding = False
 
     def __init__(self, *a, **kw):
-        super(BrokenStartRequestsSpider, self).__init__(*a, **kw)
+        super().__init__(*a, **kw)
         self.seedsseen = []
 
     def start_requests(self):
         if self.fail_before_yield:
             1 / 0
 
-        for s in xrange(100):
+        for s in range(100):
             qargs = {'total': 10, 'seed': s}
-            url = "http://localhost:8998/follow?%s" % urlencode(qargs, doseq=1)
+            url = self.mockserver.url("/follow?%s") % urlencode(qargs, doseq=1)
             yield Request(url, meta={'seed': s})
             if self.fail_yielding:
                 2 / 0
 
-        assert self.seedsseen, \
-                'All start requests consumed before any download happened'
+        assert self.seedsseen, 'All start requests consumed before any download happened'
 
     def parse(self, response):
         self.seedsseen.append(response.meta.get('seed'))
-        for req in super(BrokenStartRequestsSpider, self).parse(response):
+        for req in super().parse(response):
             yield req
 
 
@@ -159,7 +230,7 @@ class SingleRequestSpider(MetaSpider):
             return self.errback_func(failure)
 
 
-class DuplicateStartRequestsSpider(Spider):
+class DuplicateStartRequestsSpider(MockServerSpider):
     dont_filter = True
     name = 'duplicatestartrequests'
     distinct_urls = 2
@@ -168,15 +239,101 @@ class DuplicateStartRequestsSpider(Spider):
     def start_requests(self):
         for i in range(0, self.distinct_urls):
             for j in range(0, self.dupe_factor):
-                url = "http://localhost:8998/echo?headers=1&body=test%d" % i
-                yield self.make_requests_from_url(url)
-
-    def make_requests_from_url(self, url):
-        return Request(url, dont_filter=self.dont_filter)
+                url = self.mockserver.url("/echo?headers=1&body=test%d" % i)
+                yield Request(url, dont_filter=self.dont_filter)
 
     def __init__(self, url="http://localhost:8998", *args, **kwargs):
-        super(DuplicateStartRequestsSpider, self).__init__(*args, **kwargs)
+        super().__init__(*args, **kwargs)
         self.visited = 0
 
     def parse(self, response):
         self.visited += 1
+
+
+class CrawlSpiderWithParseMethod(MockServerSpider, CrawlSpider):
+    """
+    A CrawlSpider which overrides the 'parse' method
+    """
+    name = 'crawl_spider_with_parse_method'
+    custom_settings = {
+        'RETRY_HTTP_CODES': [],  # no need to retry
+    }
+    rules = (
+        Rule(LinkExtractor(), callback='parse', follow=True),
+    )
+
+    def start_requests(self):
+        test_body = b"""
+        <html>
+            <head><title>Page title<title></head>
+            <body>
+                <p><a href="/status?n=200">Item 200</a></p>  <!-- callback -->
+                <p><a href="/status?n=201">Item 201</a></p>  <!-- callback -->
+            </body>
+        </html>
+        """
+        url = self.mockserver.url("/alpayload")
+        yield Request(url, method="POST", body=test_body)
+
+    def parse(self, response, foo=None):
+        self.logger.info('[parse] status %i (foo: %s)', response.status, foo)
+        yield Request(self.mockserver.url("/status?n=202"), self.parse, cb_kwargs={"foo": "bar"})
+
+
+class CrawlSpiderWithErrback(CrawlSpiderWithParseMethod):
+    name = 'crawl_spider_with_errback'
+    rules = (
+        Rule(LinkExtractor(), callback='parse', errback='errback', follow=True),
+    )
+
+    def start_requests(self):
+        test_body = b"""
+        <html>
+            <head><title>Page title<title></head>
+            <body>
+                <p><a href="/status?n=200">Item 200</a></p>  <!-- callback -->
+                <p><a href="/status?n=201">Item 201</a></p>  <!-- callback -->
+                <p><a href="/status?n=404">Item 404</a></p>  <!-- errback -->
+                <p><a href="/status?n=500">Item 500</a></p>  <!-- errback -->
+                <p><a href="/status?n=501">Item 501</a></p>  <!-- errback -->
+            </body>
+        </html>
+        """
+        url = self.mockserver.url("/alpayload")
+        yield Request(url, method="POST", body=test_body)
+
+    def errback(self, failure):
+        self.logger.info('[errback] status %i', failure.value.response.status)
+
+
+class BytesReceivedCallbackSpider(MetaSpider):
+
+    full_response_length = 2**18
+
+    @classmethod
+    def from_crawler(cls, crawler, *args, **kwargs):
+        spider = super().from_crawler(crawler, *args, **kwargs)
+        crawler.signals.connect(spider.bytes_received, signals.bytes_received)
+        return spider
+
+    def start_requests(self):
+        body = b"a" * self.full_response_length
+        url = self.mockserver.url("/alpayload")
+        yield Request(url, method="POST", body=body, errback=self.errback)
+
+    def parse(self, response):
+        self.meta["response"] = response
+
+    def errback(self, failure):
+        self.meta["failure"] = failure
+
+    def bytes_received(self, data, request, spider):
+        self.meta["bytes_received"] = data
+        raise StopDownload(fail=False)
+
+
+class BytesReceivedErrbackSpider(BytesReceivedCallbackSpider):
+
+    def bytes_received(self, data, request, spider):
+        self.meta["bytes_received"] = data
+        raise StopDownload(fail=True)
diff --git a/tests/test_closespider.py b/tests/test_closespider.py
index 8d30a4643..5ec5e2989 100644
--- a/tests/test_closespider.py
+++ b/tests/test_closespider.py
@@ -1,6 +1,6 @@
 from twisted.internet import defer
 from twisted.trial.unittest import TestCase
-from scrapy.utils.test import docrawl
+from scrapy.utils.test import get_crawler
 from tests.spiders import FollowAllSpider, ItemSpider, ErrorSpider
 from tests.mockserver import MockServer
 
@@ -16,47 +16,41 @@ class TestCloseSpider(TestCase):
 
     @defer.inlineCallbacks
     def test_closespider_itemcount(self):
-        spider = ItemSpider()
         close_on = 5
-        yield docrawl(spider, {'CLOSESPIDER_ITEMCOUNT': close_on})
-        reason = spider.meta['close_reason']
+        crawler = get_crawler(ItemSpider, {'CLOSESPIDER_ITEMCOUNT': close_on})
+        yield crawler.crawl(mockserver=self.mockserver)
+        reason = crawler.spider.meta['close_reason']
         self.assertEqual(reason, 'closespider_itemcount')
-        itemcount = spider.crawler.stats.get_value('item_scraped_count')
+        itemcount = crawler.stats.get_value('item_scraped_count')
         self.assertTrue(itemcount >= close_on)
 
     @defer.inlineCallbacks
     def test_closespider_pagecount(self):
-        spider = FollowAllSpider()
         close_on = 5
-        yield docrawl(spider, {'CLOSESPIDER_PAGECOUNT': close_on})
-        reason = spider.meta['close_reason']
+        crawler = get_crawler(FollowAllSpider, {'CLOSESPIDER_PAGECOUNT': close_on})
+        yield crawler.crawl(mockserver=self.mockserver)
+        reason = crawler.spider.meta['close_reason']
         self.assertEqual(reason, 'closespider_pagecount')
-        pagecount = spider.crawler.stats.get_value('response_received_count')
+        pagecount = crawler.stats.get_value('response_received_count')
         self.assertTrue(pagecount >= close_on)
 
     @defer.inlineCallbacks
     def test_closespider_errorcount(self):
-        spider = ErrorSpider(total=1000000)
         close_on = 5
-        yield docrawl(spider, {'CLOSESPIDER_ERRORCOUNT': close_on})
-        self.flushLoggedErrors(spider.exception_cls)
-        reason = spider.meta['close_reason']
+        crawler = get_crawler(ErrorSpider, {'CLOSESPIDER_ERRORCOUNT': close_on})
+        yield crawler.crawl(total=1000000, mockserver=self.mockserver)
+        reason = crawler.spider.meta['close_reason']
         self.assertEqual(reason, 'closespider_errorcount')
-        key = 'spider_exceptions/{name}'\
-                .format(name=spider.exception_cls.__name__)
-        errorcount = spider.crawler.stats.get_value(key)
+        key = 'spider_exceptions/{name}'.format(name=crawler.spider.exception_cls.__name__)
+        errorcount = crawler.stats.get_value(key)
         self.assertTrue(errorcount >= close_on)
 
     @defer.inlineCallbacks
     def test_closespider_timeout(self):
-        spider = FollowAllSpider(total=1000000)
         close_on = 0.1
-        yield docrawl(spider, {'CLOSESPIDER_TIMEOUT': close_on})
-        reason = spider.meta['close_reason']
+        crawler = get_crawler(FollowAllSpider, {'CLOSESPIDER_TIMEOUT': close_on})
+        yield crawler.crawl(total=1000000, mockserver=self.mockserver)
+        reason = crawler.spider.meta['close_reason']
         self.assertEqual(reason, 'closespider_timeout')
-        stats = spider.crawler.stats
-        start = stats.get_value('start_time')
-        stop = stats.get_value('finish_time')
-        diff = stop - start
-        total_seconds = diff.seconds + diff.microseconds
+        total_seconds = crawler.stats.get_value('elapsed_time_seconds')
         self.assertTrue(total_seconds >= close_on)
diff --git a/tests/test_cmdline/__init__.py b/tests/test_cmdline/__init__.py
index 00fce2fbc..591075a98 100644
--- a/tests/test_cmdline/__init__.py
+++ b/tests/test_cmdline/__init__.py
@@ -1,9 +1,16 @@
+import json
+import os
+import pstats
+import shutil
 import sys
-from subprocess import Popen, PIPE
+import tempfile
 import unittest
+from io import StringIO
+from subprocess import Popen, PIPE
 
 from scrapy.utils.test import get_testenv
 
+
 class CmdlineTest(unittest.TestCase):
 
     def setUp(self):
@@ -11,21 +18,49 @@ class CmdlineTest(unittest.TestCase):
         self.env['SCRAPY_SETTINGS_MODULE'] = 'tests.test_cmdline.settings'
 
     def _execute(self, *new_args, **kwargs):
+        encoding = getattr(sys.stdout, 'encoding') or 'utf-8'
         args = (sys.executable, '-m', 'scrapy.cmdline') + new_args
         proc = Popen(args, stdout=PIPE, stderr=PIPE, env=self.env, **kwargs)
-        comm = proc.communicate()
-        return comm[0].strip()
+        comm = proc.communicate()[0].strip()
+        return comm.decode(encoding)
 
     def test_default_settings(self):
-        self.assertEqual(self._execute('settings', '--get', 'TEST1'), \
-                         'default')
+        self.assertEqual(self._execute('settings', '--get', 'TEST1'), 'default')
 
     def test_override_settings_using_set_arg(self):
-        self.assertEqual(self._execute('settings', '--get', 'TEST1', '-s', 'TEST1=override'), \
-                         'override')
+        self.assertEqual(self._execute('settings', '--get', 'TEST1', '-s',
+                                       'TEST1=override'), 'override')
 
     def test_override_settings_using_envvar(self):
         self.env['SCRAPY_TEST1'] = 'override'
-        self.assertEqual(self._execute('settings', '--get', 'TEST1'), \
-                         'override')
+        self.assertEqual(self._execute('settings', '--get', 'TEST1'), 'override')
 
+    def test_profiling(self):
+        path = tempfile.mkdtemp()
+        filename = os.path.join(path, 'res.prof')
+        try:
+            self._execute('version', '--profile', filename)
+            self.assertTrue(os.path.exists(filename))
+            out = StringIO()
+            stats = pstats.Stats(filename, stream=out)
+            stats.print_stats()
+            out.seek(0)
+            stats = out.read()
+            self.assertIn(os.path.join('scrapy', 'commands', 'version.py'),
+                          stats)
+            self.assertIn('tottime', stats)
+        finally:
+            shutil.rmtree(path)
+
+    def test_override_dict_settings(self):
+        EXT_PATH = "tests.test_cmdline.extensions.DummyExtension"
+        EXTENSIONS = {EXT_PATH: 200}
+        settingsstr = self._execute('settings', '--get', 'EXTENSIONS', '-s',
+                                    'EXTENSIONS=' + json.dumps(EXTENSIONS))
+        # XXX: There's gotta be a smarter way to do this...
+        self.assertNotIn("...", settingsstr)
+        for char in ("'", "<", ">"):
+            settingsstr = settingsstr.replace(char, '"')
+        settingsdict = json.loads(settingsstr)
+        self.assertCountEqual(settingsdict.keys(), EXTENSIONS.keys())
+        self.assertEqual(200, settingsdict[EXT_PATH])
diff --git a/tests/test_cmdline/extensions.py b/tests/test_cmdline/extensions.py
index 4d347966a..6504b4d2c 100644
--- a/tests/test_cmdline/extensions.py
+++ b/tests/test_cmdline/extensions.py
@@ -1,6 +1,7 @@
 """A test extension used to check the settings loading order"""
 
-class TestExtension(object):
+
+class TestExtension:
 
     def __init__(self, settings):
         settings.set('TEST1', "%s + %s" % (settings['TEST1'], 'started'))
@@ -8,3 +9,7 @@ class TestExtension(object):
     @classmethod
     def from_crawler(cls, crawler):
         return cls(crawler.settings)
+
+
+class DummyExtension:
+    pass
diff --git a/tests/test_cmdline/settings.py b/tests/test_cmdline/settings.py
index 9aceffb0d..8a719ddf2 100644
--- a/tests/test_cmdline/settings.py
+++ b/tests/test_cmdline/settings.py
@@ -1,5 +1,5 @@
-EXTENSIONS = [
-    'tests.test_cmdline.extensions.TestExtension'
-]
+EXTENSIONS = {
+    'tests.test_cmdline.extensions.TestExtension': 0,
+}
 
 TEST1 = 'default'
diff --git a/tests/test_cmdline_crawl_with_pipeline/__init__.py b/tests/test_cmdline_crawl_with_pipeline/__init__.py
new file mode 100644
index 000000000..d341888d3
--- /dev/null
+++ b/tests/test_cmdline_crawl_with_pipeline/__init__.py
@@ -0,0 +1,20 @@
+import os
+import sys
+import unittest
+from subprocess import Popen, PIPE
+
+
+class CmdlineCrawlPipelineTest(unittest.TestCase):
+
+    def _execute(self, spname):
+        args = (sys.executable, '-m', 'scrapy.cmdline', 'crawl', spname)
+        cwd = os.path.dirname(os.path.abspath(__file__))
+        proc = Popen(args, stdout=PIPE, stderr=PIPE, cwd=cwd)
+        proc.communicate()
+        return proc.returncode
+
+    def test_open_spider_normally_in_pipeline(self):
+        self.assertEqual(self._execute('normal'), 0)
+
+    def test_exception_at_open_spider_in_pipeline(self):
+        self.assertEqual(self._execute('exception'), 1)
diff --git a/tests/test_cmdline_crawl_with_pipeline/scrapy.cfg b/tests/test_cmdline_crawl_with_pipeline/scrapy.cfg
new file mode 100644
index 000000000..2f238dba3
--- /dev/null
+++ b/tests/test_cmdline_crawl_with_pipeline/scrapy.cfg
@@ -0,0 +1,2 @@
+[settings]
+default = test_spider.settings
diff --git a/scrapy/contrib_exp/downloadermiddleware/__init__.py b/tests/test_cmdline_crawl_with_pipeline/test_spider/__init__.py
similarity index 100%
rename from scrapy/contrib_exp/downloadermiddleware/__init__.py
rename to tests/test_cmdline_crawl_with_pipeline/test_spider/__init__.py
diff --git a/tests/test_cmdline_crawl_with_pipeline/test_spider/pipelines.py b/tests/test_cmdline_crawl_with_pipeline/test_spider/pipelines.py
new file mode 100644
index 000000000..bd1f9cd8c
--- /dev/null
+++ b/tests/test_cmdline_crawl_with_pipeline/test_spider/pipelines.py
@@ -0,0 +1,16 @@
+class TestSpiderPipeline:
+
+    def open_spider(self, spider):
+        pass
+
+    def process_item(self, item, spider):
+        return item
+
+
+class TestSpiderExceptionPipeline:
+
+    def open_spider(self, spider):
+        raise Exception('exception')
+
+    def process_item(self, item, spider):
+        return item
diff --git a/tests/test_cmdline_crawl_with_pipeline/test_spider/settings.py b/tests/test_cmdline_crawl_with_pipeline/test_spider/settings.py
new file mode 100644
index 000000000..ae782c0d8
--- /dev/null
+++ b/tests/test_cmdline_crawl_with_pipeline/test_spider/settings.py
@@ -0,0 +1,2 @@
+BOT_NAME = 'test_spider'
+SPIDER_MODULES = ['test_spider.spiders']
diff --git a/tests/test_spidermanager/test_spiders/__init__.py b/tests/test_cmdline_crawl_with_pipeline/test_spider/spiders/__init__.py
similarity index 100%
rename from tests/test_spidermanager/test_spiders/__init__.py
rename to tests/test_cmdline_crawl_with_pipeline/test_spider/spiders/__init__.py
diff --git a/tests/test_cmdline_crawl_with_pipeline/test_spider/spiders/exception.py b/tests/test_cmdline_crawl_with_pipeline/test_spider/spiders/exception.py
new file mode 100644
index 000000000..300f45ebf
--- /dev/null
+++ b/tests/test_cmdline_crawl_with_pipeline/test_spider/spiders/exception.py
@@ -0,0 +1,14 @@
+import scrapy
+
+
+class ExceptionSpider(scrapy.Spider):
+    name = 'exception'
+
+    custom_settings = {
+        'ITEM_PIPELINES': {
+            'test_spider.pipelines.TestSpiderExceptionPipeline': 300
+        }
+    }
+
+    def parse(self, response):
+        pass
diff --git a/tests/test_cmdline_crawl_with_pipeline/test_spider/spiders/normal.py b/tests/test_cmdline_crawl_with_pipeline/test_spider/spiders/normal.py
new file mode 100644
index 000000000..87a40fdcb
--- /dev/null
+++ b/tests/test_cmdline_crawl_with_pipeline/test_spider/spiders/normal.py
@@ -0,0 +1,14 @@
+import scrapy
+
+
+class NormalSpider(scrapy.Spider):
+    name = 'normal'
+
+    custom_settings = {
+        'ITEM_PIPELINES': {
+            'test_spider.pipelines.TestSpiderPipeline': 300
+        }
+    }
+
+    def parse(self, response):
+        pass
diff --git a/tests/test_command_check.py b/tests/test_command_check.py
new file mode 100644
index 000000000..f27f526a3
--- /dev/null
+++ b/tests/test_command_check.py
@@ -0,0 +1,97 @@
+from os.path import join, abspath
+
+from tests.test_commands import CommandTest
+
+
+class CheckCommandTest(CommandTest):
+
+    command = 'check'
+
+    def setUp(self):
+        super(CheckCommandTest, self).setUp()
+        self.spider_name = 'check_spider'
+        self.spider = abspath(join(self.proj_mod_path, 'spiders', 'checkspider.py'))
+
+    def _write_contract(self, contracts, parse_def):
+        with open(self.spider, 'w') as file:
+            file.write("""
+import scrapy
+
+class CheckSpider(scrapy.Spider):
+    name = '{0}'
+    start_urls = ['http://example.com']
+
+    def parse(self, response, **cb_kwargs):
+        \"\"\"
+        @url http://example.com
+        {1}
+        \"\"\"
+        {2}
+            """.format(self.spider_name, contracts, parse_def))
+
+    def _test_contract(self, contracts='', parse_def='pass'):
+        self._write_contract(contracts, parse_def)
+        p, out, err = self.proc('check')
+        self.assertNotIn('F', out)
+        self.assertIn('OK', err)
+        self.assertEqual(p.returncode, 0)
+
+    def test_check_returns_requests_contract(self):
+        contracts = """
+        @returns requests 1
+        """
+        parse_def = """
+        yield scrapy.Request(url='http://next-url.com')
+        """
+        self._test_contract(contracts, parse_def)
+
+    def test_check_returns_items_contract(self):
+        contracts = """
+        @returns items 1
+        """
+        parse_def = """
+        yield {'key1': 'val1', 'key2': 'val2'}
+        """
+        self._test_contract(contracts, parse_def)
+
+    def test_check_cb_kwargs_contract(self):
+        contracts = """
+        @cb_kwargs {"arg1": "val1", "arg2": "val2"}
+        """
+        parse_def = """
+        if len(cb_kwargs.items()) == 0:
+            raise Exception("Callback args not set")
+        """
+        self._test_contract(contracts, parse_def)
+
+    def test_check_scrapes_contract(self):
+        contracts = """
+        @scrapes key1 key2
+        """
+        parse_def = """
+        yield {'key1': 'val1', 'key2': 'val2'}
+        """
+        self._test_contract(contracts, parse_def)
+
+    def test_check_all_default_contracts(self):
+        contracts = """
+        @returns items 1
+        @returns requests 1
+        @scrapes key1 key2
+        @cb_kwargs {"arg1": "val1", "arg2": "val2"}
+        """
+        parse_def = """
+        yield {'key1': 'val1', 'key2': 'val2'}
+        yield scrapy.Request(url='http://next-url.com')
+        if len(cb_kwargs.items()) == 0:
+            raise Exception("Callback args not set")
+        """
+        self._test_contract(contracts, parse_def)
+
+    def test_SCRAPY_CHECK_set(self):
+        parse_def = """
+        import os
+        if not os.environ.get('SCRAPY_CHECK'):
+            raise Exception('SCRAPY_CHECK not set')
+        """
+        self._test_contract(parse_def=parse_def)
diff --git a/tests/test_command_fetch.py b/tests/test_command_fetch.py
index 5283852b7..9d3c8fe73 100644
--- a/tests/test_command_fetch.py
+++ b/tests/test_command_fetch.py
@@ -12,11 +12,23 @@ class FetchTest(ProcessTest, SiteTest, unittest.TestCase):
     @defer.inlineCallbacks
     def test_output(self):
         _, out, _ = yield self.execute([self.url('/text')])
-        self.assertEqual(out.strip(), 'Works')
+        self.assertEqual(out.strip(), b'Works')
+
+    @defer.inlineCallbacks
+    def test_redirect_default(self):
+        _, out, _ = yield self.execute([self.url('/redirect')])
+        self.assertEqual(out.strip(), b'Redirected here')
+
+    @defer.inlineCallbacks
+    def test_redirect_disabled(self):
+        _, out, err = yield self.execute(['--no-redirect', self.url('/redirect-no-meta-refresh')])
+        err = err.strip()
+        self.assertIn(b'downloader/response_status_count/302', err, err)
+        self.assertNotIn(b'downloader/response_status_count/200', err, err)
 
     @defer.inlineCallbacks
     def test_headers(self):
         _, out, _ = yield self.execute([self.url('/text'), '--headers'])
-        out = out.replace('\r', '') # required on win32
-        assert 'Server: TwistedWeb' in out
-        assert 'Content-Type: text/plain' in out
+        out = out.replace(b'\r', b'')  # required on win32
+        assert b'Server: TwistedWeb' in out, out
+        assert b'Content-Type: text/plain' in out
diff --git a/tests/test_command_parse.py b/tests/test_command_parse.py
new file mode 100644
index 000000000..e115f420f
--- /dev/null
+++ b/tests/test_command_parse.py
@@ -0,0 +1,241 @@
+import os
+from os.path import join, abspath, isfile, exists
+from twisted.internet import defer
+from scrapy.utils.testsite import SiteTest
+from scrapy.utils.testproc import ProcessTest
+from scrapy.utils.python import to_unicode
+from tests.test_commands import CommandTest
+
+
+def _textmode(bstr):
+    """Normalize input the same as writing to a file
+    and reading from it in text mode"""
+    return to_unicode(bstr).replace(os.linesep, '\n')
+
+
+class ParseCommandTest(ProcessTest, SiteTest, CommandTest):
+    command = 'parse'
+
+    def setUp(self):
+        super().setUp()
+        self.spider_name = 'parse_spider'
+        fname = abspath(join(self.proj_mod_path, 'spiders', 'myspider.py'))
+        with open(fname, 'w') as f:
+            f.write("""
+import scrapy
+from scrapy.linkextractors import LinkExtractor
+from scrapy.spiders import CrawlSpider, Rule
+
+
+class MySpider(scrapy.Spider):
+    name = '{0}'
+
+    def parse(self, response):
+        if getattr(self, 'test_arg', None):
+            self.logger.debug('It Works!')
+        return [scrapy.Item(), dict(foo='bar')]
+
+    def parse_request_with_meta(self, response):
+        foo = response.meta.get('foo', 'bar')
+
+        if foo == 'bar':
+            self.logger.debug('It Does Not Work :(')
+        else:
+            self.logger.debug('It Works!')
+
+    def parse_request_with_cb_kwargs(self, response, foo=None, key=None):
+        if foo == 'bar' and key == 'value':
+            self.logger.debug('It Works!')
+        else:
+            self.logger.debug('It Does Not Work :(')
+
+    def parse_request_without_meta(self, response):
+        foo = response.meta.get('foo', 'bar')
+
+        if foo == 'bar':
+            self.logger.debug('It Works!')
+        else:
+            self.logger.debug('It Does Not Work :(')
+
+class MyGoodCrawlSpider(CrawlSpider):
+    name = 'goodcrawl{0}'
+
+    rules = (
+        Rule(LinkExtractor(allow=r'/html'), callback='parse_item', follow=True),
+        Rule(LinkExtractor(allow=r'/text'), follow=True),
+    )
+
+    def parse_item(self, response):
+        return [scrapy.Item(), dict(foo='bar')]
+
+    def parse(self, response):
+        return [scrapy.Item(), dict(nomatch='default')]
+
+
+class MyBadCrawlSpider(CrawlSpider):
+    '''Spider which doesn't define a parse_item callback while using it in a rule.'''
+    name = 'badcrawl{0}'
+
+    rules = (
+        Rule(LinkExtractor(allow=r'/html'), callback='parse_item', follow=True),
+    )
+
+    def parse(self, response):
+        return [scrapy.Item(), dict(foo='bar')]
+""".format(self.spider_name))
+
+        fname = abspath(join(self.proj_mod_path, 'pipelines.py'))
+        with open(fname, 'w') as f:
+            f.write("""
+import logging
+
+class MyPipeline:
+    component_name = 'my_pipeline'
+
+    def process_item(self, item, spider):
+        logging.info('It Works!')
+        return item
+""")
+
+        fname = abspath(join(self.proj_mod_path, 'settings.py'))
+        with open(fname, 'a') as f:
+            f.write("""
+ITEM_PIPELINES = {'%s.pipelines.MyPipeline': 1}
+""" % self.project_name)
+
+    @defer.inlineCallbacks
+    def test_spider_arguments(self):
+        _, _, stderr = yield self.execute(['--spider', self.spider_name,
+                                           '-a', 'test_arg=1',
+                                           '-c', 'parse',
+                                           '--verbose',
+                                           self.url('/html')])
+        self.assertIn("DEBUG: It Works!", _textmode(stderr))
+
+    @defer.inlineCallbacks
+    def test_request_with_meta(self):
+        raw_json_string = '{"foo" : "baz"}'
+        _, _, stderr = yield self.execute(['--spider', self.spider_name,
+                                           '--meta', raw_json_string,
+                                           '-c', 'parse_request_with_meta',
+                                           '--verbose',
+                                           self.url('/html')])
+        self.assertIn("DEBUG: It Works!", _textmode(stderr))
+
+        _, _, stderr = yield self.execute(['--spider', self.spider_name,
+                                           '-m', raw_json_string,
+                                           '-c', 'parse_request_with_meta',
+                                           '--verbose',
+                                           self.url('/html')])
+        self.assertIn("DEBUG: It Works!", _textmode(stderr))
+
+    @defer.inlineCallbacks
+    def test_request_with_cb_kwargs(self):
+        raw_json_string = '{"foo" : "bar", "key": "value"}'
+        _, _, stderr = yield self.execute(['--spider', self.spider_name,
+                                           '--cbkwargs', raw_json_string,
+                                           '-c', 'parse_request_with_cb_kwargs',
+                                           '--verbose',
+                                           self.url('/html')])
+        self.assertIn("DEBUG: It Works!", _textmode(stderr))
+
+    @defer.inlineCallbacks
+    def test_request_without_meta(self):
+        _, _, stderr = yield self.execute(['--spider', self.spider_name,
+                                           '-c', 'parse_request_without_meta',
+                                           '--nolinks',
+                                           self.url('/html')])
+        self.assertIn("DEBUG: It Works!", _textmode(stderr))
+
+    @defer.inlineCallbacks
+    def test_pipelines(self):
+        _, _, stderr = yield self.execute(['--spider', self.spider_name,
+                                           '--pipelines',
+                                           '-c', 'parse',
+                                           '--verbose',
+                                           self.url('/html')])
+        self.assertIn("INFO: It Works!", _textmode(stderr))
+
+    @defer.inlineCallbacks
+    def test_parse_items(self):
+        status, out, stderr = yield self.execute(
+            ['--spider', self.spider_name, '-c', 'parse', self.url('/html')]
+        )
+        self.assertIn("""[{}, {'foo': 'bar'}]""", _textmode(out))
+
+    @defer.inlineCallbacks
+    def test_parse_items_no_callback_passed(self):
+        status, out, stderr = yield self.execute(
+            ['--spider', self.spider_name, self.url('/html')]
+        )
+        self.assertIn("""[{}, {'foo': 'bar'}]""", _textmode(out))
+
+    @defer.inlineCallbacks
+    def test_wrong_callback_passed(self):
+        status, out, stderr = yield self.execute(
+            ['--spider', self.spider_name, '-c', 'dummy', self.url('/html')]
+        )
+        self.assertRegex(_textmode(out), r"""# Scraped Items  -+\n\[\]""")
+        self.assertIn("""Cannot find callback""", _textmode(stderr))
+
+    @defer.inlineCallbacks
+    def test_crawlspider_matching_rule_callback_set(self):
+        """If a rule matches the URL, use it's defined callback."""
+        status, out, stderr = yield self.execute(
+            ['--spider', 'goodcrawl' + self.spider_name, '-r', self.url('/html')]
+        )
+        self.assertIn("""[{}, {'foo': 'bar'}]""", _textmode(out))
+
+    @defer.inlineCallbacks
+    def test_crawlspider_matching_rule_default_callback(self):
+        """If a rule match but it has no callback set, use the 'parse' callback."""
+        status, out, stderr = yield self.execute(
+            ['--spider', 'goodcrawl' + self.spider_name, '-r', self.url('/text')]
+        )
+        self.assertIn("""[{}, {'nomatch': 'default'}]""", _textmode(out))
+
+    @defer.inlineCallbacks
+    def test_spider_with_no_rules_attribute(self):
+        """Using -r with a spider with no rule should not produce items."""
+        status, out, stderr = yield self.execute(
+            ['--spider', self.spider_name, '-r', self.url('/html')]
+        )
+        self.assertRegex(_textmode(out), r"""# Scraped Items  -+\n\[\]""")
+        self.assertIn("""No CrawlSpider rules found""", _textmode(stderr))
+
+    @defer.inlineCallbacks
+    def test_crawlspider_missing_callback(self):
+        status, out, stderr = yield self.execute(
+            ['--spider', 'badcrawl' + self.spider_name, '-r', self.url('/html')]
+        )
+        self.assertRegex(_textmode(out), r"""# Scraped Items  -+\n\[\]""")
+
+    @defer.inlineCallbacks
+    def test_crawlspider_no_matching_rule(self):
+        """The requested URL has no matching rule, so no items should be scraped"""
+        status, out, stderr = yield self.execute(
+            ['--spider', 'badcrawl' + self.spider_name, '-r', self.url('/enc-gb18030')]
+        )
+        self.assertRegex(_textmode(out), r"""# Scraped Items  -+\n\[\]""")
+        self.assertIn("""Cannot find a rule that matches""", _textmode(stderr))
+
+    @defer.inlineCallbacks
+    def test_output_flag(self):
+        """Checks if a file was created successfully having
+        correct format containing correct data in it.
+        """
+        file_name = 'data.json'
+        file_path = join(self.proj_path, file_name)
+        yield self.execute([
+            '--spider', self.spider_name,
+            '-c', 'parse',
+            '-o', file_name,
+            self.url('/html')
+        ])
+
+        self.assertTrue(exists(file_path))
+        self.assertTrue(isfile(file_path))
+
+        content = '[\n{},\n{"foo": "bar"}\n]'
+        with open(file_path, 'r') as f:
+            self.assertEqual(f.read(), content)
diff --git a/tests/test_command_shell.py b/tests/test_command_shell.py
index a56236d54..66c293c00 100644
--- a/tests/test_command_shell.py
+++ b/tests/test_command_shell.py
@@ -1,9 +1,13 @@
+from os.path import join
+
 from twisted.trial import unittest
 from twisted.internet import defer
 
 from scrapy.utils.testsite import SiteTest
 from scrapy.utils.testproc import ProcessTest
 
+from tests import tests_datadir
+
 
 class ShellTest(ProcessTest, SiteTest, unittest.TestCase):
 
@@ -12,38 +16,69 @@ class ShellTest(ProcessTest, SiteTest, unittest.TestCase):
     @defer.inlineCallbacks
     def test_empty(self):
         _, out, _ = yield self.execute(['-c', 'item'])
-        assert '{}' in out
+        assert b'{}' in out
 
     @defer.inlineCallbacks
     def test_response_body(self):
         _, out, _ = yield self.execute([self.url('/text'), '-c', 'response.body'])
-        assert 'Works' in out
+        assert b'Works' in out
 
     @defer.inlineCallbacks
     def test_response_type_text(self):
         _, out, _ = yield self.execute([self.url('/text'), '-c', 'type(response)'])
-        assert 'TextResponse' in out
+        assert b'TextResponse' in out
 
     @defer.inlineCallbacks
     def test_response_type_html(self):
         _, out, _ = yield self.execute([self.url('/html'), '-c', 'type(response)'])
-        assert 'HtmlResponse' in out
+        assert b'HtmlResponse' in out
 
     @defer.inlineCallbacks
     def test_response_selector_html(self):
-        xpath = 'response.xpath("//p[@class=\'one\']/text()").extract()[0]'
+        xpath = 'response.xpath("//p[@class=\'one\']/text()").get()'
         _, out, _ = yield self.execute([self.url('/html'), '-c', xpath])
-        self.assertEqual(out.strip(), 'Works')
+        self.assertEqual(out.strip(), b'Works')
 
     @defer.inlineCallbacks
     def test_response_encoding_gb18030(self):
         _, out, _ = yield self.execute([self.url('/enc-gb18030'), '-c', 'response.encoding'])
-        self.assertEqual(out.strip(), 'gb18030')
+        self.assertEqual(out.strip(), b'gb18030')
 
     @defer.inlineCallbacks
     def test_redirect(self):
         _, out, _ = yield self.execute([self.url('/redirect'), '-c', 'response.url'])
-        assert out.strip().endswith('/redirected')
+        assert out.strip().endswith(b'/redirected')
+
+    @defer.inlineCallbacks
+    def test_redirect_follow_302(self):
+        _, out, _ = yield self.execute([self.url('/redirect-no-meta-refresh'), '-c', 'response.status'])
+        assert out.strip().endswith(b'200')
+
+    @defer.inlineCallbacks
+    def test_redirect_not_follow_302(self):
+        _, out, _ = yield self.execute(
+            ['--no-redirect', self.url('/redirect-no-meta-refresh'), '-c', 'response.status']
+        )
+        assert out.strip().endswith(b'302')
+
+    @defer.inlineCallbacks
+    def test_fetch_redirect_follow_302(self):
+        """Test that calling ``fetch(url)`` follows HTTP redirects by default."""
+        url = self.url('/redirect-no-meta-refresh')
+        code = "fetch('{0}')"
+        errcode, out, errout = yield self.execute(['-c', code.format(url)])
+        self.assertEqual(errcode, 0, out)
+        assert b'Redirecting (302)' in errout
+        assert b'Crawled (200)' in errout
+
+    @defer.inlineCallbacks
+    def test_fetch_redirect_not_follow_302(self):
+        """Test that calling ``fetch(url, redirect=False)`` disables automatic redirects."""
+        url = self.url('/redirect-no-meta-refresh')
+        code = "fetch('{0}', redirect=False)"
+        errcode, out, errout = yield self.execute(['-c', code.format(url)])
+        self.assertEqual(errcode, 0, out)
+        assert b'Crawled (302)' in errout
 
     @defer.inlineCallbacks
     def test_request_replace(self):
@@ -51,3 +86,30 @@ class ShellTest(ProcessTest, SiteTest, unittest.TestCase):
         code = "fetch('{0}') or fetch(response.request.replace(method='POST'))"
         errcode, out, _ = yield self.execute(['-c', code.format(url)])
         self.assertEqual(errcode, 0, out)
+
+    @defer.inlineCallbacks
+    def test_scrapy_import(self):
+        url = self.url('/text')
+        code = "fetch(scrapy.Request('{0}'))"
+        errcode, out, _ = yield self.execute(['-c', code.format(url)])
+        self.assertEqual(errcode, 0, out)
+
+    @defer.inlineCallbacks
+    def test_local_file(self):
+        filepath = join(tests_datadir, 'test_site', 'index.html')
+        _, out, _ = yield self.execute([filepath, '-c', 'item'])
+        assert b'{}' in out
+
+    @defer.inlineCallbacks
+    def test_local_nofile(self):
+        filepath = 'file:///tests/sample_data/test_site/nothinghere.html'
+        errcode, out, err = yield self.execute([filepath, '-c', 'item'], check_code=False)
+        self.assertEqual(errcode, 1, out or err)
+        self.assertIn(b'No such file or directory', err)
+
+    @defer.inlineCallbacks
+    def test_dns_failures(self):
+        url = 'www.somedomainthatdoesntexi.st'
+        errcode, out, err = yield self.execute([url, '-c', 'item'], check_code=False)
+        self.assertEqual(errcode, 1, out or err)
+        self.assertIn(b'DNS lookup failed', err)
diff --git a/tests/test_command_version.py b/tests/test_command_version.py
index 6f0380d77..99c01c2b7 100644
--- a/tests/test_command_version.py
+++ b/tests/test_command_version.py
@@ -1,3 +1,4 @@
+import sys
 from twisted.trial import unittest
 from twisted.internet import defer
 
@@ -11,5 +12,22 @@ class VersionTest(ProcessTest, unittest.TestCase):
 
     @defer.inlineCallbacks
     def test_output(self):
+        encoding = getattr(sys.stdout, 'encoding') or 'utf-8'
         _, out, _ = yield self.execute([])
-        self.assertEqual(out.strip(), "Scrapy %s" % scrapy.__version__)
+        self.assertEqual(
+            out.strip().decode(encoding),
+            "Scrapy %s" % scrapy.__version__,
+        )
+
+    @defer.inlineCallbacks
+    def test_verbose_output(self):
+        encoding = getattr(sys.stdout, 'encoding') or 'utf-8'
+        _, out, _ = yield self.execute(['-v'])
+        headers = [
+            line.partition(":")[0].strip()
+            for line in out.strip().decode(encoding).splitlines()
+        ]
+        self.assertEqual(headers, ['Scrapy', 'lxml', 'libxml2',
+                                   'cssselect', 'parsel', 'w3lib',
+                                   'Twisted', 'Python', 'pyOpenSSL',
+                                   'cryptography', 'Platform'])
diff --git a/tests/test_commands.py b/tests/test_commands.py
index eefda833e..ee8a92604 100644
--- a/tests/test_commands.py
+++ b/tests/test_commands.py
@@ -1,17 +1,53 @@
+import inspect
+import json
+import optparse
 import os
-import sys
+import platform
 import subprocess
+import sys
 import tempfile
-from time import sleep
-from os.path import exists, join, abspath
-from shutil import rmtree
+from contextlib import contextmanager
+from itertools import chain
+from os.path import exists, join, abspath, getmtime
+from pathlib import Path
+from shutil import rmtree, copytree
+from stat import S_IWRITE as ANYONE_WRITE_PERMISSION
 from tempfile import mkdtemp
+from threading import Timer
+from unittest import skipIf
 
+from pytest import mark
 from twisted.trial import unittest
 
-from scrapy.utils.python import retry_on_eintr
+import scrapy
+from scrapy.commands import ScrapyCommand
+from scrapy.commands.startproject import IGNORE
+from scrapy.settings import Settings
+from scrapy.utils.python import to_unicode
 from scrapy.utils.test import get_testenv
 
+from tests.test_crawler import ExceptionSpider, NoRequestsSpider
+
+
+class CommandSettings(unittest.TestCase):
+
+    def setUp(self):
+        self.command = ScrapyCommand()
+        self.command.settings = Settings()
+        self.parser = optparse.OptionParser(
+            formatter=optparse.TitledHelpFormatter(),
+            conflict_handler='resolve',
+        )
+        self.command.add_options(self.parser)
+
+    def test_settings_json_string(self):
+        feeds_json = '{"data.json": {"format": "json"}, "data.xml": {"format": "xml"}}'
+        opts, args = self.parser.parse_args(args=['-s', 'FEEDS={}'.format(feeds_json), 'spider.py'])
+        self.command.process_options(args, opts)
+        self.assertIsInstance(self.command.settings['FEEDS'], scrapy.settings.BaseSettings)
+        self.assertEqual(dict(self.command.settings['FEEDS']), json.loads(feeds_json))
+
+
 class ProjectTest(unittest.TestCase):
     project_name = 'testproject'
 
@@ -29,24 +65,27 @@ class ProjectTest(unittest.TestCase):
         with tempfile.TemporaryFile() as out:
             args = (sys.executable, '-m', 'scrapy.cmdline') + new_args
             return subprocess.call(args, stdout=out, stderr=out, cwd=self.cwd,
-                env=self.env, **kwargs)
+                                   env=self.env, **kwargs)
 
-    def proc(self, *new_args, **kwargs):
+    def proc(self, *new_args, **popen_kwargs):
         args = (sys.executable, '-m', 'scrapy.cmdline') + new_args
         p = subprocess.Popen(args, cwd=self.cwd, env=self.env,
                              stdout=subprocess.PIPE, stderr=subprocess.PIPE,
-                             **kwargs)
+                             **popen_kwargs)
 
-        waited = 0
-        interval = 0.2
-        while p.poll() is None:
-            sleep(interval)
-            waited += interval
-            if waited > 15:
-                p.kill()
-                assert False, 'Command took too much time to complete'
+        def kill_proc():
+            p.kill()
+            p.communicate()
+            assert False, 'Command took too much time to complete'
 
-        return p
+        timer = Timer(15, kill_proc)
+        try:
+            timer.start()
+            stdout, stderr = p.communicate()
+        finally:
+            timer.cancel()
+
+        return p, to_unicode(stdout), to_unicode(stderr)
 
 
 class StartprojectTest(ProjectTest):
@@ -66,11 +105,219 @@ class StartprojectTest(ProjectTest):
         self.assertEqual(1, self.call('startproject', 'wrong---project---name'))
         self.assertEqual(1, self.call('startproject', 'sys'))
 
+    def test_startproject_with_project_dir(self):
+        project_dir = mkdtemp()
+        self.assertEqual(0, self.call('startproject', self.project_name, project_dir))
+
+        assert exists(join(abspath(project_dir), 'scrapy.cfg'))
+        assert exists(join(abspath(project_dir), 'testproject'))
+        assert exists(join(join(abspath(project_dir), self.project_name), '__init__.py'))
+        assert exists(join(join(abspath(project_dir), self.project_name), 'items.py'))
+        assert exists(join(join(abspath(project_dir), self.project_name), 'pipelines.py'))
+        assert exists(join(join(abspath(project_dir), self.project_name), 'settings.py'))
+        assert exists(join(join(abspath(project_dir), self.project_name), 'spiders', '__init__.py'))
+
+        self.assertEqual(0, self.call('startproject', self.project_name, project_dir + '2'))
+
+        self.assertEqual(1, self.call('startproject', self.project_name, project_dir))
+        self.assertEqual(1, self.call('startproject', self.project_name + '2', project_dir))
+        self.assertEqual(1, self.call('startproject', 'wrong---project---name'))
+        self.assertEqual(1, self.call('startproject', 'sys'))
+        self.assertEqual(2, self.call('startproject'))
+        self.assertEqual(2, self.call('startproject', self.project_name, project_dir, 'another_params'))
+
+
+def get_permissions_dict(path, renamings=None, ignore=None):
+    renamings = renamings or tuple()
+    permissions_dict = {
+        '.': os.stat(path).st_mode,
+    }
+    for root, dirs, files in os.walk(path):
+        nodes = list(chain(dirs, files))
+        if ignore:
+            ignored_names = ignore(root, nodes)
+            nodes = [node for node in nodes if node not in ignored_names]
+        for node in nodes:
+            absolute_path = os.path.join(root, node)
+            relative_path = os.path.relpath(absolute_path, path)
+            for search_string, replacement in renamings:
+                relative_path = relative_path.replace(
+                    search_string,
+                    replacement
+                )
+            permissions = os.stat(absolute_path).st_mode
+            permissions_dict[relative_path] = permissions
+    return permissions_dict
+
+
+class StartprojectTemplatesTest(ProjectTest):
+
+    def setUp(self):
+        super().setUp()
+        self.tmpl = join(self.temp_path, 'templates')
+        self.tmpl_proj = join(self.tmpl, 'project')
+
+    def test_startproject_template_override(self):
+        copytree(join(scrapy.__path__[0], 'templates'), self.tmpl)
+        with open(join(self.tmpl_proj, 'root_template'), 'w'):
+            pass
+        assert exists(join(self.tmpl_proj, 'root_template'))
+
+        args = ['--set', 'TEMPLATES_DIR=%s' % self.tmpl]
+        p, out, err = self.proc('startproject', self.project_name, *args)
+        self.assertIn("New Scrapy project '%s', using template directory"
+                      % self.project_name, out)
+        self.assertIn(self.tmpl_proj, out)
+        assert exists(join(self.proj_path, 'root_template'))
+
+    def test_startproject_permissions_from_writable(self):
+        """Check that generated files have the right permissions when the
+        template folder has the same permissions as in the project, i.e.
+        everything is writable."""
+        scrapy_path = scrapy.__path__[0]
+        project_template = os.path.join(scrapy_path, 'templates', 'project')
+        project_name = 'startproject1'
+        renamings = (
+            ('module', project_name),
+            ('.tmpl', ''),
+        )
+        expected_permissions = get_permissions_dict(
+            project_template,
+            renamings,
+            IGNORE,
+        )
+
+        destination = mkdtemp()
+        process = subprocess.Popen(
+            (
+                sys.executable,
+                '-m',
+                'scrapy.cmdline',
+                'startproject',
+                project_name,
+            ),
+            cwd=destination,
+            env=self.env,
+        )
+        process.wait()
+
+        project_dir = os.path.join(destination, project_name)
+        actual_permissions = get_permissions_dict(project_dir)
+
+        self.assertEqual(actual_permissions, expected_permissions)
+
+    def test_startproject_permissions_from_read_only(self):
+        """Check that generated files have the right permissions when the
+        template folder has been made read-only, which is something that some
+        systems do.
+
+        See https://github.com/scrapy/scrapy/pull/4604
+        """
+        scrapy_path = scrapy.__path__[0]
+        templates_dir = os.path.join(scrapy_path, 'templates')
+        project_template = os.path.join(templates_dir, 'project')
+        project_name = 'startproject2'
+        renamings = (
+            ('module', project_name),
+            ('.tmpl', ''),
+        )
+        expected_permissions = get_permissions_dict(
+            project_template,
+            renamings,
+            IGNORE,
+        )
+
+        def _make_read_only(path):
+            current_permissions = os.stat(path).st_mode
+            os.chmod(path, current_permissions & ~ANYONE_WRITE_PERMISSION)
+
+        read_only_templates_dir = str(Path(mkdtemp()) / 'templates')
+        copytree(templates_dir, read_only_templates_dir)
+
+        for root, dirs, files in os.walk(read_only_templates_dir):
+            for node in chain(dirs, files):
+                _make_read_only(os.path.join(root, node))
+
+        destination = mkdtemp()
+        process = subprocess.Popen(
+            (
+                sys.executable,
+                '-m',
+                'scrapy.cmdline',
+                'startproject',
+                project_name,
+                '--set',
+                'TEMPLATES_DIR={}'.format(read_only_templates_dir),
+            ),
+            cwd=destination,
+            env=self.env,
+        )
+        process.wait()
+
+        project_dir = os.path.join(destination, project_name)
+        actual_permissions = get_permissions_dict(project_dir)
+
+        self.assertEqual(actual_permissions, expected_permissions)
+
+    def test_startproject_permissions_unchanged_in_destination(self):
+        """Check that pre-existing folders and files in the destination folder
+        do not see their permissions modified."""
+        scrapy_path = scrapy.__path__[0]
+        project_template = os.path.join(scrapy_path, 'templates', 'project')
+        project_name = 'startproject3'
+        renamings = (
+            ('module', project_name),
+            ('.tmpl', ''),
+        )
+        expected_permissions = get_permissions_dict(
+            project_template,
+            renamings,
+            IGNORE,
+        )
+
+        destination = mkdtemp()
+        project_dir = os.path.join(destination, project_name)
+
+        existing_nodes = {
+            oct(permissions)[2:] + extension: permissions
+            for extension in ('', '.d')
+            for permissions in (
+                0o444, 0o555, 0o644, 0o666, 0o755, 0o777,
+            )
+        }
+        os.mkdir(project_dir)
+        project_dir_path = Path(project_dir)
+        for node, permissions in existing_nodes.items():
+            path = project_dir_path / node
+            if node.endswith('.d'):
+                path.mkdir(mode=permissions)
+            else:
+                path.touch(mode=permissions)
+            expected_permissions[node] = path.stat().st_mode
+
+        process = subprocess.Popen(
+            (
+                sys.executable,
+                '-m',
+                'scrapy.cmdline',
+                'startproject',
+                project_name,
+                '.',
+            ),
+            cwd=project_dir,
+            env=self.env,
+        )
+        process.wait()
+
+        actual_permissions = get_permissions_dict(project_dir)
+
+        self.assertEqual(actual_permissions, expected_permissions)
+
 
 class CommandTest(ProjectTest):
 
     def setUp(self):
-        super(CommandTest, self).setUp()
+        super().setUp()
         self.call('startproject', self.project_name)
         self.cwd = join(self.temp_path, self.project_name)
         self.env['SCRAPY_SETTINGS_MODULE'] = '%s.settings' % self.project_name
@@ -89,13 +336,14 @@ class GenspiderCommandTest(CommandTest):
     def test_template(self, tplname='crawl'):
         args = ['--template=%s' % tplname] if tplname else []
         spname = 'test_spider'
-        p = self.proc('genspider', spname, 'test.com', *args)
-        out = retry_on_eintr(p.stdout.read)
-        self.assert_("Created spider %r using template %r in module" % (spname, tplname) in out)
-        self.assert_(exists(join(self.proj_mod_path, 'spiders', 'test_spider.py')))
-        p = self.proc('genspider', spname, 'test.com', *args)
-        out = retry_on_eintr(p.stdout.read)
-        self.assert_("Spider %r already exists in module" % spname in out)
+        p, out, err = self.proc('genspider', spname, 'test.com', *args)
+        self.assertIn("Created spider %r using template %r in module" % (spname, tplname), out)
+        self.assertTrue(exists(join(self.proj_mod_path, 'spiders', 'test_spider.py')))
+        modify_time_before = getmtime(join(self.proj_mod_path, 'spiders', 'test_spider.py'))
+        p, out, err = self.proc('genspider', spname, 'test.com', *args)
+        self.assertIn("Spider %r already exists in module" % spname, out)
+        modify_time_after = getmtime(join(self.proj_mod_path, 'spiders', 'test_spider.py'))
+        self.assertEqual(modify_time_after, modify_time_before)
 
     def test_template_basic(self):
         self.test_template('basic')
@@ -117,123 +365,364 @@ class GenspiderCommandTest(CommandTest):
         self.assertEqual(2, self.call('genspider', self.project_name))
         assert not exists(join(self.proj_mod_path, 'spiders', '%s.py' % self.project_name))
 
+    def test_same_filename_as_existing_spider(self, force=False):
+        file_name = 'example'
+        file_path = join(self.proj_mod_path, 'spiders', '%s.py' % file_name)
+        self.assertEqual(0, self.call('genspider', file_name, 'example.com'))
+        assert exists(file_path)
+
+        # change name of spider but not its file name
+        with open(file_path, 'r+') as spider_file:
+            file_data = spider_file.read()
+            file_data = file_data.replace("name = \'example\'", "name = \'renamed\'")
+            spider_file.seek(0)
+            spider_file.write(file_data)
+            spider_file.truncate()
+        modify_time_before = getmtime(file_path)
+        file_contents_before = file_data
+
+        if force:
+            p, out, err = self.proc('genspider', '--force', file_name, 'example.com')
+            self.assertIn("Created spider %r using template \'basic\' in module" % file_name, out)
+            modify_time_after = getmtime(file_path)
+            self.assertNotEqual(modify_time_after, modify_time_before)
+            file_contents_after = open(file_path, 'r').read()
+            self.assertNotEqual(file_contents_after, file_contents_before)
+        else:
+            p, out, err = self.proc('genspider', file_name, 'example.com')
+            self.assertIn("%s already exists" % (file_path), out)
+            modify_time_after = getmtime(file_path)
+            self.assertEqual(modify_time_after, modify_time_before)
+            file_contents_after = open(file_path, 'r').read()
+            self.assertEqual(file_contents_after, file_contents_before)
+
+    def test_same_filename_as_existing_spider_force(self):
+        self.test_same_filename_as_existing_spider(force=True)
+
+
+class GenspiderStandaloneCommandTest(ProjectTest):
+
+    def test_generate_standalone_spider(self):
+        self.call('genspider', 'example', 'example.com')
+        assert exists(join(self.temp_path, 'example.py'))
+
+    def test_same_name_as_existing_file(self, force=False):
+        file_name = 'example'
+        file_path = join(self.temp_path, file_name + '.py')
+        p, out, err = self.proc('genspider', file_name, 'example.com')
+        self.assertIn("Created spider %r using template \'basic\' " % file_name, out)
+        assert exists(file_path)
+        modify_time_before = getmtime(file_path)
+        file_contents_before = open(file_path, 'r').read()
+
+        if force:
+            # use different template to ensure contents were changed
+            p, out, err = self.proc('genspider', '--force', '-t', 'crawl', file_name, 'example.com')
+            self.assertIn("Created spider %r using template \'crawl\' " % file_name, out)
+            modify_time_after = getmtime(file_path)
+            self.assertNotEqual(modify_time_after, modify_time_before)
+            file_contents_after = open(file_path, 'r').read()
+            self.assertNotEqual(file_contents_after, file_contents_before)
+        else:
+            p, out, err = self.proc('genspider', file_name, 'example.com')
+            self.assertIn("%s already exists" % join(self.temp_path, file_name + ".py"), out)
+            modify_time_after = getmtime(file_path)
+            self.assertEqual(modify_time_after, modify_time_before)
+            file_contents_after = open(file_path, 'r').read()
+            self.assertEqual(file_contents_after, file_contents_before)
+
+    def test_same_name_as_existing_file_force(self):
+        self.test_same_name_as_existing_file(force=True)
+
 
 class MiscCommandsTest(CommandTest):
 
     def test_list(self):
         self.assertEqual(0, self.call('list'))
 
+
 class RunSpiderCommandTest(CommandTest):
 
-    def test_runspider(self):
-        tmpdir = self.mktemp()
-        os.mkdir(tmpdir)
-        fname = abspath(join(tmpdir, 'myspider.py'))
-        with open(fname, 'w') as f:
-            f.write("""
-from scrapy import log
-from scrapy.spider import Spider
+    debug_log_spider = """
+import scrapy
 
-class MySpider(Spider):
+class MySpider(scrapy.Spider):
     name = 'myspider'
 
     def start_requests(self):
-        self.log("It Works!")
+        self.logger.debug("It Works!")
         return []
-""")
-        p = self.proc('runspider', fname)
-        log = p.stderr.read()
-        self.assert_("[myspider] DEBUG: It Works!" in log, log)
-        self.assert_("[myspider] INFO: Spider opened" in log, log)
-        self.assert_("[myspider] INFO: Closing spider (finished)" in log, log)
-        self.assert_("[myspider] INFO: Spider closed (finished)" in log, log)
+"""
 
-    def test_runspider_no_spider_found(self):
+    @contextmanager
+    def _create_file(self, content, name):
         tmpdir = self.mktemp()
         os.mkdir(tmpdir)
-        fname = abspath(join(tmpdir, 'myspider.py'))
+        fname = abspath(join(tmpdir, name))
         with open(fname, 'w') as f:
-            f.write("""
-from scrapy import log
-from scrapy.spider import Spider
-""")
-        p = self.proc('runspider', fname)
-        log = p.stderr.read()
-        self.assert_("No spider found in file" in log)
+            f.write(content)
+        try:
+            yield fname
+        finally:
+            rmtree(tmpdir)
 
-    def test_runspider_file_not_found(self):
-        p = self.proc('runspider', 'some_non_existent_file')
-        log = p.stderr.read()
-        self.assert_("File not found: some_non_existent_file" in log)
+    def runspider(self, code, name='myspider.py', args=()):
+        with self._create_file(code, name) as fname:
+            return self.proc('runspider', fname, *args)
 
-    def test_runspider_unable_to_load(self):
-        tmpdir = self.mktemp()
-        os.mkdir(tmpdir)
-        fname = abspath(join(tmpdir, 'myspider.txt'))
-        with open(fname, 'w') as f:
-            f.write("")
-        p = self.proc('runspider', fname)
-        log = p.stderr.read()
-        self.assert_("Unable to load" in log)
+    def get_log(self, code, name='myspider.py', args=()):
+        p, stdout, stderr = self.runspider(code, name=name, args=args)
+        return stderr
 
+    def test_runspider(self):
+        log = self.get_log(self.debug_log_spider)
+        self.assertIn("DEBUG: It Works!", log)
+        self.assertIn("INFO: Spider opened", log)
+        self.assertIn("INFO: Closing spider (finished)", log)
+        self.assertIn("INFO: Spider closed (finished)", log)
 
-class ParseCommandTest(CommandTest):
+    def test_run_fail_spider(self):
+        proc, _, _ = self.runspider("import scrapy\n" + inspect.getsource(ExceptionSpider))
+        ret = proc.returncode
+        self.assertNotEqual(ret, 0)
 
-    def setUp(self):
-        super(ParseCommandTest, self).setUp()
-        self.spider_name = 'parse_spider'
-        fname = abspath(join(self.proj_mod_path, 'spiders', 'myspider.py'))
-        with open(fname, 'w') as f:
-            f.write("""
-from scrapy import log
-from scrapy.spider import Spider
-from scrapy.item import Item
+    def test_run_good_spider(self):
+        proc, _, _ = self.runspider("import scrapy\n" + inspect.getsource(NoRequestsSpider))
+        ret = proc.returncode
+        self.assertEqual(ret, 0)
 
-class MySpider(Spider):
-    name = '{0}'
+    def test_runspider_log_level(self):
+        log = self.get_log(self.debug_log_spider,
+                           args=('-s', 'LOG_LEVEL=INFO'))
+        self.assertNotIn("DEBUG: It Works!", log)
+        self.assertIn("INFO: Spider opened", log)
+
+    def test_runspider_dnscache_disabled(self):
+        # see https://github.com/scrapy/scrapy/issues/2811
+        # The spider below should not be able to connect to localhost:12345,
+        # which is intended,
+        # but this should not be because of DNS lookup error
+        # assumption: localhost will resolve in all cases (true?)
+        log = self.get_log("""
+import scrapy
+
+class MySpider(scrapy.Spider):
+    name = 'myspider'
+    start_urls = ['http://localhost:12345']
 
     def parse(self, response):
-        if getattr(self, 'test_arg', None):
-            self.log('It Works!')
-        return [Item()]
-""".format(self.spider_name))
+        return {'test': 'value'}
+""",
+                           args=('-s', 'DNSCACHE_ENABLED=False'))
+        print(log)
+        self.assertNotIn("DNSLookupError", log)
+        self.assertIn("INFO: Spider opened", log)
 
-        fname = abspath(join(self.proj_mod_path, 'pipelines.py'))
-        with open(fname, 'w') as f:
-            f.write("""
-from scrapy import log
+    def test_runspider_log_short_names(self):
+        log1 = self.get_log(self.debug_log_spider,
+                            args=('-s', 'LOG_SHORT_NAMES=1'))
+        print(log1)
+        self.assertIn("[myspider] DEBUG: It Works!", log1)
+        self.assertIn("[scrapy]", log1)
+        self.assertNotIn("[scrapy.core.engine]", log1)
 
-class MyPipeline(object):
-    component_name = 'my_pipeline'
+        log2 = self.get_log(self.debug_log_spider,
+                            args=('-s', 'LOG_SHORT_NAMES=0'))
+        print(log2)
+        self.assertIn("[myspider] DEBUG: It Works!", log2)
+        self.assertNotIn("[scrapy]", log2)
+        self.assertIn("[scrapy.core.engine]", log2)
 
-    def process_item(self, item, spider):
-        log.msg('It Works!')
-        return item
-""")
+    def test_runspider_no_spider_found(self):
+        log = self.get_log("from scrapy.spiders import Spider\n")
+        self.assertIn("No spider found in file", log)
 
-        fname = abspath(join(self.proj_mod_path, 'settings.py'))
-        with open(fname, 'a') as f:
-            f.write("""
-ITEM_PIPELINES = {'%s.pipelines.MyPipeline': 1}
-""" % self.project_name)
+    def test_runspider_file_not_found(self):
+        _, _, log = self.proc('runspider', 'some_non_existent_file')
+        self.assertIn("File not found: some_non_existent_file", log)
 
-    def test_spider_arguments(self):
-        p = self.proc('parse', '--spider', self.spider_name, '-a', 'test_arg=1',
-                '-c', 'parse', 'http://scrapinghub.com')
-        log = p.stderr.read()
-        self.assert_("[parse_spider] DEBUG: It Works!" in log, log)
+    def test_runspider_unable_to_load(self):
+        log = self.get_log('', name='myspider.txt')
+        self.assertIn('Unable to load', log)
 
-    def test_pipelines(self):
-        p = self.proc('parse', '--spider', self.spider_name, '--pipelines',
-                '-c', 'parse', 'http://scrapinghub.com')
-        log = p.stderr.read()
-        self.assert_("[scrapy] INFO: It Works!" in log, log)
+    def test_start_requests_errors(self):
+        log = self.get_log("""
+import scrapy
+
+class BadSpider(scrapy.Spider):
+    name = "bad"
+    def start_requests(self):
+        raise Exception("oops!")
+        """, name="badspider.py")
+        print(log)
+        self.assertIn("start_requests", log)
+        self.assertIn("badspider.py", log)
+
+    # https://twistedmatrix.com/trac/ticket/9766
+    @skipIf(platform.system() == 'Windows' and sys.version_info >= (3, 8),
+            "the asyncio reactor is broken on Windows when running Python ≥ 3.8")
+    def test_asyncio_enabled_true(self):
+        log = self.get_log(self.debug_log_spider, args=[
+            '-s', 'TWISTED_REACTOR=twisted.internet.asyncioreactor.AsyncioSelectorReactor'
+        ])
+        self.assertIn("Using reactor: twisted.internet.asyncioreactor.AsyncioSelectorReactor", log)
+
+    def test_asyncio_enabled_false(self):
+        log = self.get_log(self.debug_log_spider, args=[])
+        self.assertNotIn("Using reactor: twisted.internet.asyncioreactor.AsyncioSelectorReactor", log)
+
+    @mark.skipif(sys.implementation.name == 'pypy', reason='uvloop does not support pypy properly')
+    @mark.skipif(platform.system() == 'Windows', reason='uvloop does not support Windows')
+    def test_custom_asyncio_loop_enabled_true(self):
+        log = self.get_log(self.debug_log_spider, args=[
+            '-s',
+            'TWISTED_REACTOR=twisted.internet.asyncioreactor.AsyncioSelectorReactor',
+            '-s',
+            'ASYNCIO_EVENT_LOOP=uvloop.Loop',
+        ])
+        self.assertIn("Using asyncio event loop: uvloop.Loop", log)
+
+    # https://twistedmatrix.com/trac/ticket/9766
+    @skipIf(platform.system() == 'Windows' and sys.version_info >= (3, 8),
+            "the asyncio reactor is broken on Windows when running Python ≥ 3.8")
+    def test_custom_asyncio_loop_enabled_false(self):
+        log = self.get_log(self.debug_log_spider, args=[
+            '-s', 'TWISTED_REACTOR=twisted.internet.asyncioreactor.AsyncioSelectorReactor'
+        ])
+        import asyncio
+        loop = asyncio.new_event_loop()
+        self.assertIn("Using asyncio event loop: %s.%s" % (loop.__module__, loop.__class__.__name__), log)
+
+    def test_output(self):
+        spider_code = """
+import scrapy
+
+class MySpider(scrapy.Spider):
+    name = 'myspider'
+
+    def start_requests(self):
+        self.logger.debug('FEEDS: {}'.format(self.settings.getdict('FEEDS')))
+        return []
+"""
+        args = ['-o', 'example.json']
+        log = self.get_log(spider_code, args=args)
+        self.assertIn("[myspider] DEBUG: FEEDS: {'example.json': {'format': 'json'}}", log)
+
+    def test_overwrite_output(self):
+        spider_code = """
+import json
+import scrapy
+
+class MySpider(scrapy.Spider):
+    name = 'myspider'
+
+    def start_requests(self):
+        self.logger.debug(
+            'FEEDS: {}'.format(
+                json.dumps(self.settings.getdict('FEEDS'), sort_keys=True)
+            )
+        )
+        return []
+"""
+        args = ['-O', 'example.json']
+        log = self.get_log(spider_code, args=args)
+        self.assertIn('[myspider] DEBUG: FEEDS: {"example.json": {"format": "json", "overwrite": true}}', log)
+
+    def test_output_and_overwrite_output(self):
+        spider_code = """
+import scrapy
+
+class MySpider(scrapy.Spider):
+    name = 'myspider'
+
+    def start_requests(self):
+        return []
+"""
+        args = ['-o', 'example1.json', '-O', 'example2.json']
+        log = self.get_log(spider_code, args=args)
+        self.assertIn("error: Please use only one of -o/--output and -O/--overwrite-output", log)
 
 
 class BenchCommandTest(CommandTest):
 
     def test_run(self):
-        p = self.proc('bench', '-s', 'LOGSTATS_INTERVAL=0.001',
-                '-s', 'CLOSESPIDER_TIMEOUT=0.01')
-        log = p.stderr.read()
-        self.assert_('INFO: Crawled' in log, log)
+        _, _, log = self.proc('bench', '-s', 'LOGSTATS_INTERVAL=0.001',
+                              '-s', 'CLOSESPIDER_TIMEOUT=0.01')
+        self.assertIn('INFO: Crawled', log)
+        self.assertNotIn('Unhandled Error', log)
+
+
+class CrawlCommandTest(CommandTest):
+
+    def crawl(self, code, args=()):
+        fname = abspath(join(self.proj_mod_path, 'spiders', 'myspider.py'))
+        with open(fname, 'w') as f:
+            f.write(code)
+        return self.proc('crawl', 'myspider', *args)
+
+    def get_log(self, code, args=()):
+        _, _, stderr = self.crawl(code, args=args)
+        return stderr
+
+    def test_no_output(self):
+        spider_code = """
+import scrapy
+
+class MySpider(scrapy.Spider):
+    name = 'myspider'
+
+    def start_requests(self):
+        self.logger.debug('It works!')
+        return []
+"""
+        log = self.get_log(spider_code)
+        self.assertIn("[myspider] DEBUG: It works!", log)
+
+    def test_output(self):
+        spider_code = """
+import scrapy
+
+class MySpider(scrapy.Spider):
+    name = 'myspider'
+
+    def start_requests(self):
+        self.logger.debug('FEEDS: {}'.format(self.settings.getdict('FEEDS')))
+        return []
+"""
+        args = ['-o', 'example.json']
+        log = self.get_log(spider_code, args=args)
+        self.assertIn("[myspider] DEBUG: FEEDS: {'example.json': {'format': 'json'}}", log)
+
+    def test_overwrite_output(self):
+        spider_code = """
+import json
+import scrapy
+
+class MySpider(scrapy.Spider):
+    name = 'myspider'
+
+    def start_requests(self):
+        self.logger.debug(
+            'FEEDS: {}'.format(
+                json.dumps(self.settings.getdict('FEEDS'), sort_keys=True)
+            )
+        )
+        return []
+"""
+        args = ['-O', 'example.json']
+        log = self.get_log(spider_code, args=args)
+        self.assertIn('[myspider] DEBUG: FEEDS: {"example.json": {"format": "json", "overwrite": true}}', log)
+
+    def test_output_and_overwrite_output(self):
+        spider_code = """
+import scrapy
+
+class MySpider(scrapy.Spider):
+    name = 'myspider'
+
+    def start_requests(self):
+        return []
+"""
+        args = ['-o', 'example1.json', '-O', 'example2.json']
+        log = self.get_log(spider_code, args=args)
+        self.assertIn("error: Please use only one of -o/--output and -O/--overwrite-output", log)
diff --git a/tests/test_contracts.py b/tests/test_contracts.py
index a651576a5..2e7e3ccc4 100644
--- a/tests/test_contracts.py
+++ b/tests/test_contracts.py
@@ -1,16 +1,23 @@
 from unittest import TextTestResult
 
+from twisted.internet import defer
+from twisted.python import failure
 from twisted.trial import unittest
 
-from scrapy.spider import Spider
+from scrapy import FormRequest
+from scrapy.crawler import CrawlerRunner
+from scrapy.spidermiddlewares.httperror import HttpError
+from scrapy.spiders import Spider
 from scrapy.http import Request
 from scrapy.item import Item, Field
-from scrapy.contracts import ContractsManager
+from scrapy.contracts import ContractsManager, Contract
 from scrapy.contracts.default import (
     UrlContract,
+    CallbackKeywordArgumentsContract,
     ReturnsContract,
     ScrapesContract,
 )
+from tests.mockserver import MockServer
 
 
 class TestItem(Item):
@@ -18,10 +25,34 @@ class TestItem(Item):
     url = Field()
 
 
-class ResponseMock(object):
+class ResponseMock:
     url = 'http://scrapy.org'
 
 
+class CustomSuccessContract(Contract):
+    name = 'custom_success_contract'
+
+    def adjust_request_args(self, args):
+        args['url'] = 'http://scrapy.org'
+        return args
+
+
+class CustomFailContract(Contract):
+    name = 'custom_fail_contract'
+
+    def adjust_request_args(self, args):
+        raise TypeError('Error in adjust_request_args')
+
+
+class CustomFormContract(Contract):
+    name = 'custom_form'
+    request_cls = FormRequest
+
+    def adjust_request_args(self, args):
+        args['formdata'] = {'name': 'scrapy'}
+        return args
+
+
 class TestSpider(Spider):
     name = 'demo_spider'
 
@@ -39,6 +70,44 @@ class TestSpider(Spider):
         """
         return TestItem(url=response.url)
 
+    def returns_request_cb_kwargs(self, response, url):
+        """ method which returns request
+        @url https://example.org
+        @cb_kwargs {"url": "http://scrapy.org"}
+        @returns requests 1
+        """
+        return Request(url, callback=self.returns_item_cb_kwargs)
+
+    def returns_item_cb_kwargs(self, response, name):
+        """ method which returns item
+        @url http://scrapy.org
+        @cb_kwargs {"name": "Scrapy"}
+        @returns items 1 1
+        """
+        return TestItem(name=name, url=response.url)
+
+    def returns_item_cb_kwargs_error_unexpected_keyword(self, response):
+        """ method which returns item
+        @url http://scrapy.org
+        @cb_kwargs {"arg": "value"}
+        @returns items 1 1
+        """
+        return TestItem(url=response.url)
+
+    def returns_item_cb_kwargs_error_missing_argument(self, response, arg):
+        """ method which returns item
+        @url http://scrapy.org
+        @returns items 1 1
+        """
+        return TestItem(url=response.url)
+
+    def returns_dict_item(self, response):
+        """ method which returns item
+        @url http://scrapy.org
+        @returns items 1 1
+        """
+        return {"url": response.url}
+
     def returns_fail(self, response):
         """ method which returns item
         @url http://scrapy.org
@@ -46,6 +115,13 @@ class TestSpider(Spider):
         """
         return TestItem(url=response.url)
 
+    def returns_dict_fail(self, response):
+        """ method which returns item
+        @url http://scrapy.org
+        @returns items 0 0
+        """
+        return {'url': response.url}
+
     def scrapes_item_ok(self, response):
         """ returns item with name and url
         @url http://scrapy.org
@@ -54,6 +130,14 @@ class TestSpider(Spider):
         """
         return TestItem(name='test', url=response.url)
 
+    def scrapes_dict_item_ok(self, response):
+        """ returns item with name and url
+        @url http://scrapy.org
+        @returns items 1 1
+        @scrapes name url
+        """
+        return {'name': 'test', 'url': response.url}
+
     def scrapes_item_fail(self, response):
         """ returns item with no name
         @url http://scrapy.org
@@ -62,15 +146,70 @@ class TestSpider(Spider):
         """
         return TestItem(url=response.url)
 
+    def scrapes_dict_item_fail(self, response):
+        """ returns item with no name
+        @url http://scrapy.org
+        @returns items 1 1
+        @scrapes name url
+        """
+        return {'url': response.url}
+
+    def scrapes_multiple_missing_fields(self, response):
+        """ returns item with no name
+        @url http://scrapy.org
+        @returns items 1 1
+        @scrapes name url
+        """
+        return {}
+
     def parse_no_url(self, response):
         """ method with no url
         @returns items 1 1
         """
         pass
 
+    def custom_form(self, response):
+        """
+        @url http://scrapy.org
+        @custom_form
+        """
+        pass
+
+
+class CustomContractSuccessSpider(Spider):
+    name = 'custom_contract_success_spider'
+
+    def parse(self, response):
+        """
+        @custom_success_contract
+        """
+        pass
+
+
+class CustomContractFailSpider(Spider):
+    name = 'custom_contract_fail_spider'
+
+    def parse(self, response):
+        """
+        @custom_fail_contract
+        """
+        pass
+
+
+class InheritsTestSpider(TestSpider):
+    name = 'inherits_demo_spider'
+
 
 class ContractsManagerTest(unittest.TestCase):
-    contracts = [UrlContract, ReturnsContract, ScrapesContract]
+    contracts = [
+        UrlContract,
+        CallbackKeywordArgumentsContract,
+        ReturnsContract,
+        ScrapesContract,
+        CustomFormContract,
+        CustomSuccessContract,
+        CustomFailContract,
+    ]
 
     def setUp(self):
         self.conman = ContractsManager(self.contracts)
@@ -84,13 +223,17 @@ class ContractsManagerTest(unittest.TestCase):
         self.assertTrue(self.results.failures)
         self.assertFalse(self.results.errors)
 
+    def should_error(self):
+        self.assertTrue(self.results.errors)
+
     def test_contracts(self):
         spider = TestSpider()
 
         # extract contracts correctly
         contracts = self.conman.extract_contracts(spider.returns_request)
         self.assertEqual(len(contracts), 2)
-        self.assertEqual(frozenset(type(x) for x in contracts),
+        self.assertEqual(
+            frozenset(type(x) for x in contracts),
             frozenset([UrlContract, ReturnsContract]))
 
         # returns request for valid method
@@ -101,6 +244,51 @@ class ContractsManagerTest(unittest.TestCase):
         request = self.conman.from_method(spider.parse_no_url, self.results)
         self.assertEqual(request, None)
 
+    def test_cb_kwargs(self):
+        spider = TestSpider()
+        response = ResponseMock()
+
+        # extract contracts correctly
+        contracts = self.conman.extract_contracts(spider.returns_request_cb_kwargs)
+        self.assertEqual(len(contracts), 3)
+        self.assertEqual(frozenset(type(x) for x in contracts),
+                         frozenset([UrlContract, CallbackKeywordArgumentsContract, ReturnsContract]))
+
+        contracts = self.conman.extract_contracts(spider.returns_item_cb_kwargs)
+        self.assertEqual(len(contracts), 3)
+        self.assertEqual(frozenset(type(x) for x in contracts),
+                         frozenset([UrlContract, CallbackKeywordArgumentsContract, ReturnsContract]))
+
+        contracts = self.conman.extract_contracts(spider.returns_item_cb_kwargs_error_unexpected_keyword)
+        self.assertEqual(len(contracts), 3)
+        self.assertEqual(frozenset(type(x) for x in contracts),
+                         frozenset([UrlContract, CallbackKeywordArgumentsContract, ReturnsContract]))
+
+        contracts = self.conman.extract_contracts(spider.returns_item_cb_kwargs_error_missing_argument)
+        self.assertEqual(len(contracts), 2)
+        self.assertEqual(frozenset(type(x) for x in contracts),
+                         frozenset([UrlContract, ReturnsContract]))
+
+        # returns_request
+        request = self.conman.from_method(spider.returns_request_cb_kwargs, self.results)
+        request.callback(response, **request.cb_kwargs)
+        self.should_succeed()
+
+        # returns_item
+        request = self.conman.from_method(spider.returns_item_cb_kwargs, self.results)
+        request.callback(response, **request.cb_kwargs)
+        self.should_succeed()
+
+        # returns_item (error, callback doesn't take keyword arguments)
+        request = self.conman.from_method(spider.returns_item_cb_kwargs_error_unexpected_keyword, self.results)
+        request.callback(response, **request.cb_kwargs)
+        self.should_error()
+
+        # returns_item (error, contract doesn't provide keyword arguments)
+        request = self.conman.from_method(spider.returns_item_cb_kwargs_error_missing_argument, self.results)
+        request.callback(response, **request.cb_kwargs)
+        self.should_error()
+
     def test_returns(self):
         spider = TestSpider()
         response = ResponseMock()
@@ -110,6 +298,11 @@ class ContractsManagerTest(unittest.TestCase):
         request.callback(response)
         self.should_succeed()
 
+        # returns_dict_item
+        request = self.conman.from_method(spider.returns_dict_item, self.results)
+        request.callback(response)
+        self.should_succeed()
+
         # returns_request
         request = self.conman.from_method(spider.returns_request, self.results)
         request.callback(response)
@@ -120,6 +313,11 @@ class ContractsManagerTest(unittest.TestCase):
         request.callback(response)
         self.should_fail()
 
+        # returns_dict_fail
+        request = self.conman.from_method(spider.returns_dict_fail, self.results)
+        request.callback(response)
+        self.should_fail()
+
     def test_scrapes(self):
         spider = TestSpider()
         response = ResponseMock()
@@ -129,8 +327,90 @@ class ContractsManagerTest(unittest.TestCase):
         request.callback(response)
         self.should_succeed()
 
+        # scrapes_dict_item_ok
+        request = self.conman.from_method(spider.scrapes_dict_item_ok, self.results)
+        request.callback(response)
+        self.should_succeed()
+
         # scrapes_item_fail
-        request = self.conman.from_method(spider.scrapes_item_fail,
-                self.results)
+        request = self.conman.from_method(spider.scrapes_item_fail, self.results)
         request.callback(response)
         self.should_fail()
+
+        # scrapes_dict_item_fail
+        request = self.conman.from_method(spider.scrapes_dict_item_fail, self.results)
+        request.callback(response)
+        self.should_fail()
+
+        # scrapes_multiple_missing_fields
+        request = self.conman.from_method(spider.scrapes_multiple_missing_fields, self.results)
+        request.callback(response)
+        self.should_fail()
+        message = 'ContractFail: Missing fields: name, url'
+        assert message in self.results.failures[-1][-1]
+
+    def test_custom_contracts(self):
+        self.conman.from_spider(CustomContractSuccessSpider(), self.results)
+        self.should_succeed()
+
+        self.conman.from_spider(CustomContractFailSpider(), self.results)
+        self.should_error()
+
+    def test_errback(self):
+        spider = TestSpider()
+        response = ResponseMock()
+
+        try:
+            raise HttpError(response, 'Ignoring non-200 response')
+        except HttpError:
+            failure_mock = failure.Failure()
+
+        request = self.conman.from_method(spider.returns_request, self.results)
+        request.errback(failure_mock)
+
+        self.assertFalse(self.results.failures)
+        self.assertTrue(self.results.errors)
+
+    @defer.inlineCallbacks
+    def test_same_url(self):
+
+        class TestSameUrlSpider(Spider):
+            name = 'test_same_url'
+
+            def __init__(self, *args, **kwargs):
+                super().__init__(*args, **kwargs)
+                self.visited = 0
+
+            def start_requests(s):
+                return self.conman.from_spider(s, self.results)
+
+            def parse_first(self, response):
+                self.visited += 1
+                return TestItem()
+
+            def parse_second(self, response):
+                self.visited += 1
+                return TestItem()
+
+        with MockServer() as mockserver:
+            contract_doc = '@url {}'.format(mockserver.url('/status?n=200'))
+
+            TestSameUrlSpider.parse_first.__doc__ = contract_doc
+            TestSameUrlSpider.parse_second.__doc__ = contract_doc
+
+            crawler = CrawlerRunner().create_crawler(TestSameUrlSpider)
+            yield crawler.crawl()
+
+        self.assertEqual(crawler.spider.visited, 2)
+
+    def test_form_contract(self):
+        spider = TestSpider()
+        request = self.conman.from_method(spider.custom_form, self.results)
+        self.assertEqual(request.method, 'POST')
+        self.assertIsInstance(request, FormRequest)
+
+    def test_inherited_contracts(self):
+        spider = InheritsTestSpider()
+
+        requests = self.conman.from_spider(spider, self.results)
+        self.assertTrue(requests)
diff --git a/tests/test_contrib_exporter.py b/tests/test_contrib_exporter.py
deleted file mode 100644
index 9092007e5..000000000
--- a/tests/test_contrib_exporter.py
+++ /dev/null
@@ -1,349 +0,0 @@
-import unittest, json
-from io import BytesIO
-from six.moves import cPickle as pickle
-import lxml.etree
-import re
-
-from scrapy.item import Item, Field
-from scrapy.utils.python import str_to_unicode
-from scrapy.contrib.exporter import BaseItemExporter, PprintItemExporter, \
-    PickleItemExporter, CsvItemExporter, XmlItemExporter, JsonLinesItemExporter, \
-    JsonItemExporter, PythonItemExporter
-
-class TestItem(Item):
-    name = Field()
-    age = Field()
-
-
-class BaseItemExporterTest(unittest.TestCase):
-
-    def setUp(self):
-        self.i = TestItem(name=u'John\xa3', age='22')
-        self.output = BytesIO()
-        self.ie = self._get_exporter()
-
-    def _get_exporter(self, **kwargs):
-        return BaseItemExporter(**kwargs)
-
-    def _check_output(self):
-        pass
-
-    def _assert_expected_item(self, exported_dict):
-        for k, v in exported_dict.items():
-            exported_dict[k] = str_to_unicode(v)
-        self.assertEqual(self.i, exported_dict)
-
-    def test_export_item(self):
-        self.ie.start_exporting()
-        try:
-            self.ie.export_item(self.i)
-        except NotImplementedError:
-            if self.ie.__class__ is not BaseItemExporter:
-                raise
-        self.ie.finish_exporting()
-        self._check_output()
-
-    def test_serialize_field(self):
-        self.assertEqual(self.ie.serialize_field( \
-            self.i.fields['name'], 'name', self.i['name']), 'John\xc2\xa3')
-        self.assertEqual( \
-            self.ie.serialize_field(self.i.fields['age'], 'age', self.i['age']), '22')
-
-    def test_fields_to_export(self):
-        ie = self._get_exporter(fields_to_export=['name'])
-        self.assertEqual(list(ie._get_serialized_fields(self.i)), [('name', 'John\xc2\xa3')])
-
-        ie = self._get_exporter(fields_to_export=['name'], encoding='latin-1')
-        name = list(ie._get_serialized_fields(self.i))[0][1]
-        assert isinstance(name, str)
-        self.assertEqual(name, 'John\xa3')
-
-    def test_field_custom_serializer(self):
-        def custom_serializer(value):
-            return str(int(value) + 2)
-
-        class CustomFieldItem(Item):
-            name = Field()
-            age = Field(serializer=custom_serializer)
-
-        i = CustomFieldItem(name=u'John\xa3', age='22')
-
-        ie = self._get_exporter()
-        self.assertEqual(ie.serialize_field(i.fields['name'], 'name', i['name']), 'John\xc2\xa3')
-        self.assertEqual(ie.serialize_field(i.fields['age'], 'age', i['age']), '24')
-
-class PythonItemExporterTest(BaseItemExporterTest):
-    def _get_exporter(self, **kwargs):
-        return PythonItemExporter(**kwargs)
-
-    def test_nested_item(self):
-        i1 = TestItem(name=u'Joseph', age='22')
-        i2 = TestItem(name=u'Maria', age=i1)
-        i3 = TestItem(name=u'Jesus', age=i2)
-        ie = self._get_exporter()
-        exported = ie.export_item(i3)
-        self.assertEqual(type(exported), dict)
-        self.assertEqual(exported, {'age': {'age': {'age': '22', 'name': u'Joseph'}, 'name': u'Maria'}, 'name': 'Jesus'})
-        self.assertEqual(type(exported['age']), dict)
-        self.assertEqual(type(exported['age']['age']), dict)
-
-    def test_export_list(self):
-        i1 = TestItem(name=u'Joseph', age='22')
-        i2 = TestItem(name=u'Maria', age=[i1])
-        i3 = TestItem(name=u'Jesus', age=[i2])
-        ie = self._get_exporter()
-        exported = ie.export_item(i3)
-        self.assertEqual(exported, {'age': [{'age': [{'age': '22', 'name': u'Joseph'}], 'name': u'Maria'}], 'name': 'Jesus'})
-        self.assertEqual(type(exported['age'][0]), dict)
-        self.assertEqual(type(exported['age'][0]['age'][0]), dict)
-
-    def test_export_item_dict_list(self):
-        i1 = TestItem(name=u'Joseph', age='22')
-        i2 = dict(name=u'Maria', age=[i1])
-        i3 = TestItem(name=u'Jesus', age=[i2])
-        ie = self._get_exporter()
-        exported = ie.export_item(i3)
-        self.assertEqual(exported, {'age': [{'age': [{'age': '22', 'name': u'Joseph'}], 'name': u'Maria'}], 'name': 'Jesus'})
-        self.assertEqual(type(exported['age'][0]), dict)
-        self.assertEqual(type(exported['age'][0]['age'][0]), dict)
-
-class PprintItemExporterTest(BaseItemExporterTest):
-
-    def _get_exporter(self, **kwargs):
-        return PprintItemExporter(self.output, **kwargs)
-
-    def _check_output(self):
-        self._assert_expected_item(eval(self.output.getvalue()))
-
-class PickleItemExporterTest(BaseItemExporterTest):
-
-    def _get_exporter(self, **kwargs):
-        return PickleItemExporter(self.output, **kwargs)
-
-    def _check_output(self):
-        self._assert_expected_item(pickle.loads(self.output.getvalue()))
-
-    def test_export_multiple_items(self):
-        i1 = TestItem(name='hello', age='world')
-        i2 = TestItem(name='bye', age='world')
-        f = BytesIO()
-        ie = PickleItemExporter(f)
-        ie.start_exporting()
-        ie.export_item(i1)
-        ie.export_item(i2)
-        ie.finish_exporting()
-        f.seek(0)
-        self.assertEqual(pickle.load(f), i1)
-        self.assertEqual(pickle.load(f), i2)
-
-
-class CsvItemExporterTest(BaseItemExporterTest):
-
-    def _get_exporter(self, **kwargs):
-        return CsvItemExporter(self.output, **kwargs)
-
-    def assertCsvEqual(self, first, second, msg=None):
-        csvsplit = lambda csv: [sorted(re.split(r'(,|\s+)', line))
-                                for line in csv.splitlines(True)]
-        return self.assertEqual(csvsplit(first), csvsplit(second), msg)
-
-    def _check_output(self):
-        self.assertCsvEqual(self.output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n')
-
-    def test_header(self):
-        output = BytesIO()
-        ie = CsvItemExporter(output, fields_to_export=self.i.fields.keys())
-        ie.start_exporting()
-        ie.export_item(self.i)
-        ie.finish_exporting()
-        self.assertCsvEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n')
-
-        output = BytesIO()
-        ie = CsvItemExporter(output, fields_to_export=['age'])
-        ie.start_exporting()
-        ie.export_item(self.i)
-        ie.finish_exporting()
-        self.assertCsvEqual(output.getvalue(), 'age\r\n22\r\n')
-
-        output = BytesIO()
-        ie = CsvItemExporter(output)
-        ie.start_exporting()
-        ie.export_item(self.i)
-        ie.export_item(self.i)
-        ie.finish_exporting()
-        self.assertCsvEqual(output.getvalue(), 'age,name\r\n22,John\xc2\xa3\r\n22,John\xc2\xa3\r\n')
-
-        output = BytesIO()
-        ie = CsvItemExporter(output, include_headers_line=False)
-        ie.start_exporting()
-        ie.export_item(self.i)
-        ie.finish_exporting()
-        self.assertCsvEqual(output.getvalue(), '22,John\xc2\xa3\r\n')
-
-    def test_join_multivalue(self):
-        class TestItem2(Item):
-            name = Field()
-            friends = Field()
-
-        i = TestItem2(name='John', friends=['Mary', 'Paul'])
-        output = BytesIO()
-        ie = CsvItemExporter(output, include_headers_line=False)
-        ie.start_exporting()
-        ie.export_item(i)
-        ie.finish_exporting()
-        self.assertCsvEqual(output.getvalue(), '"Mary,Paul",John\r\n')
-
-class XmlItemExporterTest(BaseItemExporterTest):
-
-    def _get_exporter(self, **kwargs):
-        return XmlItemExporter(self.output, **kwargs)
-
-    def assertXmlEquivalent(self, first, second, msg=None):
-        def xmltuple(elem):
-            children = list(elem.iterchildren())
-            if children:
-                return [(child.tag, sorted(xmltuple(child)))
-                        for child in children]
-            else:
-                return [(elem.tag, [(elem.text, ())])]
-        def xmlsplit(xmlcontent):
-            doc = lxml.etree.fromstring(xmlcontent)
-            return xmltuple(doc)
-        return self.assertEqual(xmlsplit(first), xmlsplit(second), msg)
-
-    def _check_output(self):
-        expected_value = '<?xml version="1.0" encoding="utf-8"?>\n<items><item><age>22</age><name>John\xc2\xa3</name></item></items>'
-        self.assertXmlEquivalent(self.output.getvalue(), expected_value)
-
-    def test_multivalued_fields(self):
-        output = BytesIO()
-        item = TestItem(name=[u'John\xa3', u'Doe'])
-        ie = XmlItemExporter(output)
-        ie.start_exporting()
-        ie.export_item(item)
-        ie.finish_exporting()
-        expected_value = '<?xml version="1.0" encoding="utf-8"?>\n<items><item><name><value>John\xc2\xa3</value><value>Doe</value></name></item></items>'
-        self.assertXmlEquivalent(output.getvalue(), expected_value)
-
-    def test_nested_item(self):
-        output = BytesIO()
-        i1 = TestItem(name=u'foo\xa3hoo', age='22')
-        i2 = TestItem(name=u'bar', age=i1)
-        i3 = TestItem(name=u'buz', age=i2)
-        ie = XmlItemExporter(output)
-        ie.start_exporting()
-        ie.export_item(i3)
-        ie.finish_exporting()
-        expected_value = '<?xml version="1.0" encoding="utf-8"?>\n'\
-                '<items><item>'\
-                    '<age>'\
-                        '<age>'\
-                            '<age>22</age>'\
-                            '<name>foo\xc2\xa3hoo</name>'\
-                        '</age>'\
-                        '<name>bar</name>'\
-                    '</age>'\
-                    '<name>buz</name>'\
-                '</item></items>'
-        self.assertXmlEquivalent(output.getvalue(), expected_value)
-
-    def test_nested_list_item(self):
-        output = BytesIO()
-        i1 = TestItem(name=u'foo')
-        i2 = TestItem(name=u'bar')
-        i3 = TestItem(name=u'buz', age=[i1, i2])
-        ie = XmlItemExporter(output)
-        ie.start_exporting()
-        ie.export_item(i3)
-        ie.finish_exporting()
-        expected_value =  '<?xml version="1.0" encoding="utf-8"?>\n'\
-                '<items><item>'\
-                    '<age>'\
-                        '<value><name>foo</name></value>'\
-                        '<value><name>bar</name></value>'\
-                    '</age>'\
-                    '<name>buz</name>'\
-                '</item></items>'
-        self.assertXmlEquivalent(output.getvalue(), expected_value)
-
-
-class JsonLinesItemExporterTest(BaseItemExporterTest):
-
-    _expected_nested = {'name': u'Jesus', 'age': {'name': 'Maria', 'age': {'name': 'Joseph', 'age': '22'}}}
-
-    def _get_exporter(self, **kwargs):
-        return JsonLinesItemExporter(self.output, **kwargs)
-
-    def _check_output(self):
-        exported = json.loads(self.output.getvalue().strip())
-        self.assertEqual(exported, dict(self.i))
-
-    def test_nested_item(self):
-        i1 = TestItem(name=u'Joseph', age='22')
-        i2 = TestItem(name=u'Maria', age=i1)
-        i3 = TestItem(name=u'Jesus', age=i2)
-        self.ie.start_exporting()
-        self.ie.export_item(i3)
-        self.ie.finish_exporting()
-        exported = json.loads(self.output.getvalue())
-        self.assertEqual(exported, self._expected_nested)
-
-    def test_extra_keywords(self):
-        self.ie = self._get_exporter(sort_keys=True)
-        self.test_export_item()
-        self._check_output()
-        self.assertRaises(TypeError, self._get_exporter, foo_unknown_keyword_bar=True)
-
-
-class JsonItemExporterTest(JsonLinesItemExporterTest):
-
-    _expected_nested = [JsonLinesItemExporterTest._expected_nested]
-
-    def _get_exporter(self, **kwargs):
-        return JsonItemExporter(self.output, **kwargs)
-
-    def _check_output(self):
-        exported = json.loads(self.output.getvalue().strip())
-        self.assertEqual(exported, [dict(self.i)])
-
-    def test_two_items(self):
-        self.ie.start_exporting()
-        self.ie.export_item(self.i)
-        self.ie.export_item(self.i)
-        self.ie.finish_exporting()
-        exported = json.loads(self.output.getvalue())
-        self.assertEqual(exported, [dict(self.i), dict(self.i)])
-
-    def test_nested_item(self):
-        i1 = TestItem(name=u'Joseph\xa3', age='22')
-        i2 = TestItem(name=u'Maria', age=i1)
-        i3 = TestItem(name=u'Jesus', age=i2)
-        self.ie.start_exporting()
-        self.ie.export_item(i3)
-        self.ie.finish_exporting()
-        exported = json.loads(self.output.getvalue())
-        expected = {'name': u'Jesus', 'age': {'name': 'Maria', 'age': dict(i1)}}
-        self.assertEqual(exported, [expected])
-
-class CustomItemExporterTest(unittest.TestCase):
-
-    def test_exporter_custom_serializer(self):
-        class CustomItemExporter(BaseItemExporter):
-            def serialize_field(self, field, name, value):
-                if name == 'age':
-                    return str(int(value) + 1)
-                else:
-                    return super(CustomItemExporter, self).serialize_field(field, \
-                        name, value)
-
-        i = TestItem(name=u'John', age='22')
-        ie = CustomItemExporter()
-
-        self.assertEqual( \
-            ie.serialize_field(i.fields['name'], 'name', i['name']), 'John')
-        self.assertEqual(
-            ie.serialize_field(i.fields['age'], 'age', i['age']), '23')
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/test_contrib_feedexport.py b/tests/test_contrib_feedexport.py
deleted file mode 100644
index 77eb443d5..000000000
--- a/tests/test_contrib_feedexport.py
+++ /dev/null
@@ -1,105 +0,0 @@
-import os
-from io import BytesIO
-from six.moves.urllib.parse import urlparse
-
-from zope.interface.verify import verifyObject
-from twisted.trial import unittest
-from twisted.internet import defer
-from w3lib.url import path_to_file_uri
-
-from scrapy.spider import Spider
-from scrapy.contrib.feedexport import IFeedStorage, FileFeedStorage, FTPFeedStorage, S3FeedStorage, StdoutFeedStorage
-from scrapy.utils.test import assert_aws_environ
-
-class FileFeedStorageTest(unittest.TestCase):
-
-    def test_store_file_uri(self):
-        path = os.path.abspath(self.mktemp())
-        uri = path_to_file_uri(path)
-        return self._assert_stores(FileFeedStorage(uri), path)
-
-    def test_store_file_uri_makedirs(self):
-        path = os.path.abspath(self.mktemp())
-        path = os.path.join(path, 'more', 'paths', 'file.txt')
-        uri = path_to_file_uri(path)
-        return self._assert_stores(FileFeedStorage(uri), path)
-
-    def test_store_direct_path(self):
-        path = os.path.abspath(self.mktemp())
-        return self._assert_stores(FileFeedStorage(path), path)
-
-    def test_store_direct_path_relative(self):
-        path = self.mktemp()
-        return self._assert_stores(FileFeedStorage(path), path)
-
-    def test_interface(self):
-        path = self.mktemp()
-        st = FileFeedStorage(path)
-        verifyObject(IFeedStorage, st)
-
-    @defer.inlineCallbacks
-    def _assert_stores(self, storage, path):
-        spider = Spider("default")
-        file = storage.open(spider)
-        file.write(b"content")
-        yield storage.store(file)
-        self.assertTrue(os.path.exists(path))
-        with open(path, 'rb') as fp:
-            self.assertEqual(fp.read(), b"content")
-
-
-class FTPFeedStorageTest(unittest.TestCase):
-
-    def test_store(self):
-        uri = os.environ.get('FEEDTEST_FTP_URI')
-        path = os.environ.get('FEEDTEST_FTP_PATH')
-        if not (uri and path):
-            raise unittest.SkipTest("No FTP server available for testing")
-        st = FTPFeedStorage(uri)
-        verifyObject(IFeedStorage, st)
-        return self._assert_stores(st, path)
-
-    @defer.inlineCallbacks
-    def _assert_stores(self, storage, path):
-        spider = Spider("default")
-        file = storage.open(spider)
-        file.write(b"content")
-        yield storage.store(file)
-        self.assertTrue(os.path.exists(path))
-        with open(path, 'rb') as fp:
-            self.assertEqual(fp.read(), b"content")
-        # again, to check s3 objects are overwritten
-        yield storage.store(BytesIO(b"new content"))
-        with open(path, 'rb') as fp:
-            self.assertEqual(fp.read(), b"new content")
-
-
-class S3FeedStorageTest(unittest.TestCase):
-
-    @defer.inlineCallbacks
-    def test_store(self):
-        assert_aws_environ()
-        uri = os.environ.get('FEEDTEST_S3_URI')
-        if not uri:
-            raise unittest.SkipTest("No S3 URI available for testing")
-        from boto import connect_s3
-        storage = S3FeedStorage(uri)
-        verifyObject(IFeedStorage, storage)
-        file = storage.open(Spider("default"))
-        file.write("content")
-        yield storage.store(file)
-        u = urlparse(uri)
-        key = connect_s3().get_bucket(u.hostname, validate=False).get_key(u.path)
-        self.assertEqual(key.get_contents_as_string(), "content")
-
-
-class StdoutFeedStorageTest(unittest.TestCase):
-
-    @defer.inlineCallbacks
-    def test_store(self):
-        out = BytesIO()
-        storage = StdoutFeedStorage('stdout:', _stdout=out)
-        file = storage.open(Spider("default"))
-        file.write(b"content")
-        yield storage.store(file)
-        self.assertEqual(out.getvalue(), b"content")
diff --git a/tests/test_contrib_linkextractors.py b/tests/test_contrib_linkextractors.py
deleted file mode 100644
index 3617cb810..000000000
--- a/tests/test_contrib_linkextractors.py
+++ /dev/null
@@ -1,507 +0,0 @@
-import re
-import unittest
-from scrapy.contrib.linkextractors.regex import RegexLinkExtractor
-from scrapy.http import HtmlResponse, XmlResponse
-from scrapy.link import Link
-from scrapy.contrib.linkextractors.htmlparser import HtmlParserLinkExtractor
-from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor, BaseSgmlLinkExtractor
-from scrapy.contrib.linkextractors.lxmlhtml import LxmlLinkExtractor
-from tests import get_testdata
-
-
-class LinkExtractorTestCase(unittest.TestCase):
-    def test_basic(self):
-        html = """<html><head><title>Page title<title>
-        <body><p><a href="item/12.html">Item 12</a></p>
-        <p><a href="/about.html">About us</a></p>
-        <img src="/logo.png" alt="Company logo (not a link)" />
-        <p><a href="../othercat.html">Other category</a></p>
-        <p><a href="/">&gt;&gt;</a></p>
-        <p><a href="/" /></p>
-        </body></html>"""
-        response = HtmlResponse("http://example.org/somepage/index.html", body=html)
-
-        lx = BaseSgmlLinkExtractor()  # default: tag=a, attr=href
-        self.assertEqual(lx.extract_links(response),
-                         [Link(url='http://example.org/somepage/item/12.html', text='Item 12'),
-                          Link(url='http://example.org/about.html', text='About us'),
-                          Link(url='http://example.org/othercat.html', text='Other category'),
-                          Link(url='http://example.org/', text='>>'),
-                          Link(url='http://example.org/', text='')])
-
-    def test_base_url(self):
-        html = """<html><head><title>Page title<title><base href="http://otherdomain.com/base/" />
-        <body><p><a href="item/12.html">Item 12</a></p>
-        </body></html>"""
-        response = HtmlResponse("http://example.org/somepage/index.html", body=html)
-
-        lx = BaseSgmlLinkExtractor()  # default: tag=a, attr=href
-        self.assertEqual(lx.extract_links(response),
-                         [Link(url='http://otherdomain.com/base/item/12.html', text='Item 12')])
-
-        # base url is an absolute path and relative to host
-        html = """<html><head><title>Page title<title><base href="/" />
-        <body><p><a href="item/12.html">Item 12</a></p></body></html>"""
-        response = HtmlResponse("https://example.org/somepage/index.html", body=html)
-        self.assertEqual(lx.extract_links(response),
-                         [Link(url='https://example.org/item/12.html', text='Item 12')])
-
-        # base url has no scheme
-        html = """<html><head><title>Page title<title><base href="//noschemedomain.com/path/to/" />
-        <body><p><a href="item/12.html">Item 12</a></p></body></html>"""
-        response = HtmlResponse("https://example.org/somepage/index.html", body=html)
-        self.assertEqual(lx.extract_links(response),
-                         [Link(url='https://noschemedomain.com/path/to/item/12.html', text='Item 12')])
-
-    def test_link_text_wrong_encoding(self):
-        html = """<body><p><a href="item/12.html">Wrong: \xed</a></p></body></html>"""
-        response = HtmlResponse("http://www.example.com", body=html, encoding='utf-8')
-        lx = BaseSgmlLinkExtractor()
-        self.assertEqual(lx.extract_links(response), [
-            Link(url='http://www.example.com/item/12.html', text=u'Wrong: \ufffd'),
-        ])
-
-    def test_extraction_encoding(self):
-        body = get_testdata('link_extractor', 'linkextractor_noenc.html')
-        response_utf8 = HtmlResponse(url='http://example.com/utf8', body=body, headers={'Content-Type': ['text/html; charset=utf-8']})
-        response_noenc = HtmlResponse(url='http://example.com/noenc', body=body)
-        body = get_testdata('link_extractor', 'linkextractor_latin1.html')
-        response_latin1 = HtmlResponse(url='http://example.com/latin1', body=body)
-
-        lx = BaseSgmlLinkExtractor()
-        self.assertEqual(lx.extract_links(response_utf8), [
-            Link(url='http://example.com/sample_%C3%B1.html', text=''),
-            Link(url='http://example.com/sample_%E2%82%AC.html', text='sample \xe2\x82\xac text'.decode('utf-8')),
-        ])
-
-        self.assertEqual(lx.extract_links(response_noenc), [
-            Link(url='http://example.com/sample_%C3%B1.html', text=''),
-            Link(url='http://example.com/sample_%E2%82%AC.html', text='sample \xe2\x82\xac text'.decode('utf-8')),
-        ])
-
-        self.assertEqual(lx.extract_links(response_latin1), [
-            Link(url='http://example.com/sample_%F1.html', text=''),
-            Link(url='http://example.com/sample_%E1.html', text='sample \xe1 text'.decode('latin1')),
-        ])
-
-    def test_matches(self):
-        url1 = 'http://lotsofstuff.com/stuff1/index'
-        url2 = 'http://evenmorestuff.com/uglystuff/index'
-
-        lx = BaseSgmlLinkExtractor()
-        self.assertEqual(lx.matches(url1), True)
-        self.assertEqual(lx.matches(url2), True)
-
-    def test_link_nofollow(self):
-        html = """
-        <a href="page.html?action=print" rel="nofollow">Printer-friendly page</a>
-        <a href="about.html">About us</a>
-        """
-        response = HtmlResponse("http://example.org/page.html", body=html)
-        lx = SgmlLinkExtractor()
-        self.assertEqual([link for link in lx.extract_links(response)], [
-            Link(url='http://example.org/page.html?action=print', text=u'Printer-friendly page', nofollow=True),
-            Link(url='http://example.org/about.html', text=u'About us', nofollow=False),
-        ])
-
-
-class SgmlLinkExtractorTestCase(unittest.TestCase):
-    extractor_cls = SgmlLinkExtractor
-
-    def setUp(self):
-        body = get_testdata('link_extractor', 'sgml_linkextractor.html')
-        self.response = HtmlResponse(url='http://example.com/index', body=body)
-
-    def test_urls_type(self):
-        '''Test that the resulting urls are regular strings and not a unicode objects'''
-        lx = self.extractor_cls()
-        self.assertTrue(all(isinstance(link.url, str) for link in lx.extract_links(self.response)))
-
-    def test_extraction(self):
-        '''Test the extractor's behaviour among different situations'''
-
-        lx = self.extractor_cls()
-        self.assertEqual([link for link in lx.extract_links(self.response)], [
-            Link(url='http://example.com/sample1.html', text=u''),
-            Link(url='http://example.com/sample2.html', text=u'sample 2'),
-            Link(url='http://example.com/sample3.html', text=u'sample 3 text'),
-            Link(url='http://www.google.com/something', text=u''),
-            Link(url='http://example.com/innertag.html', text=u'inner tag'),
-        ])
-
-        lx = self.extractor_cls(allow=('sample', ))
-        self.assertEqual([link for link in lx.extract_links(self.response)], [
-            Link(url='http://example.com/sample1.html', text=u''),
-            Link(url='http://example.com/sample2.html', text=u'sample 2'),
-            Link(url='http://example.com/sample3.html', text=u'sample 3 text'),
-        ])
-
-        lx = self.extractor_cls(allow=('sample', ), unique=False)
-        self.assertEqual([link for link in lx.extract_links(self.response)], [
-            Link(url='http://example.com/sample1.html', text=u''),
-            Link(url='http://example.com/sample2.html', text=u'sample 2'),
-            Link(url='http://example.com/sample3.html', text=u'sample 3 text'),
-            Link(url='http://example.com/sample3.html', text=u'sample 3 repetition'),
-        ])
-
-        lx = self.extractor_cls(allow=('sample', ))
-        self.assertEqual([link for link in lx.extract_links(self.response)], [
-            Link(url='http://example.com/sample1.html', text=u''),
-            Link(url='http://example.com/sample2.html', text=u'sample 2'),
-            Link(url='http://example.com/sample3.html', text=u'sample 3 text'),
-        ])
-
-        lx = self.extractor_cls(allow=('sample', ), deny=('3', ))
-        self.assertEqual([link for link in lx.extract_links(self.response)], [
-            Link(url='http://example.com/sample1.html', text=u''),
-            Link(url='http://example.com/sample2.html', text=u'sample 2'),
-        ])
-
-        lx = self.extractor_cls(allow_domains=('google.com', ))
-        self.assertEqual([link for link in lx.extract_links(self.response)], [
-            Link(url='http://www.google.com/something', text=u''),
-        ])
-
-    def test_extraction_using_single_values(self):
-        '''Test the extractor's behaviour among different situations'''
-
-        lx = self.extractor_cls(allow='sample')
-        self.assertEqual([link for link in lx.extract_links(self.response)], [
-            Link(url='http://example.com/sample1.html', text=u''),
-            Link(url='http://example.com/sample2.html', text=u'sample 2'),
-            Link(url='http://example.com/sample3.html', text=u'sample 3 text'),
-        ])
-
-        lx = self.extractor_cls(allow='sample', deny='3')
-        self.assertEqual([link for link in lx.extract_links(self.response)], [
-            Link(url='http://example.com/sample1.html', text=u''),
-            Link(url='http://example.com/sample2.html', text=u'sample 2'),
-        ])
-
-        lx = self.extractor_cls(allow_domains='google.com')
-        self.assertEqual([link for link in lx.extract_links(self.response)], [
-            Link(url='http://www.google.com/something', text=u''),
-        ])
-
-        lx = self.extractor_cls(deny_domains='example.com')
-        self.assertEqual([link for link in lx.extract_links(self.response)], [
-            Link(url='http://www.google.com/something', text=u''),
-        ])
-
-    def test_nofollow(self):
-        '''Test the extractor's behaviour for links with rel="nofollow"'''
-
-        html = """<html><head><title>Page title<title>
-        <body>
-        <div class='links'>
-        <p><a href="/about.html">About us</a></p>
-        </div>
-        <div>
-        <p><a href="/follow.html">Follow this link</a></p>
-        </div>
-        <div>
-        <p><a href="/nofollow.html" rel="nofollow">Dont follow this one</a></p>
-        </div>
-        <div>
-        <p><a href="/nofollow2.html" rel="blah">Choose to follow or not</a></p>
-        </div>
-        </body></html>"""
-        response = HtmlResponse("http://example.org/somepage/index.html", body=html)
-
-        lx = self.extractor_cls()
-        self.assertEqual(lx.extract_links(response), [
-            Link(url='http://example.org/about.html', text=u'About us'),
-            Link(url='http://example.org/follow.html', text=u'Follow this link'),
-            Link(url='http://example.org/nofollow.html', text=u'Dont follow this one', nofollow=True),
-            Link(url='http://example.org/nofollow2.html', text=u'Choose to follow or not'),
-        ])
-
-    def test_matches(self):
-        url1 = 'http://lotsofstuff.com/stuff1/index'
-        url2 = 'http://evenmorestuff.com/uglystuff/index'
-
-        lx = self.extractor_cls(allow=(r'stuff1', ))
-        self.assertEqual(lx.matches(url1), True)
-        self.assertEqual(lx.matches(url2), False)
-
-        lx = self.extractor_cls(deny=(r'uglystuff', ))
-        self.assertEqual(lx.matches(url1), True)
-        self.assertEqual(lx.matches(url2), False)
-
-        lx = self.extractor_cls(allow_domains=('evenmorestuff.com', ))
-        self.assertEqual(lx.matches(url1), False)
-        self.assertEqual(lx.matches(url2), True)
-
-        lx = self.extractor_cls(deny_domains=('lotsofstuff.com', ))
-        self.assertEqual(lx.matches(url1), False)
-        self.assertEqual(lx.matches(url2), True)
-
-        lx = self.extractor_cls(allow=('blah1',), deny=('blah2',),
-                               allow_domains=('blah1.com',),
-                               deny_domains=('blah2.com',))
-        self.assertEqual(lx.matches('http://blah1.com/blah1'), True)
-        self.assertEqual(lx.matches('http://blah1.com/blah2'), False)
-        self.assertEqual(lx.matches('http://blah2.com/blah1'), False)
-        self.assertEqual(lx.matches('http://blah2.com/blah2'), False)
-
-    def test_restrict_xpaths(self):
-        lx = self.extractor_cls(restrict_xpaths=('//div[@id="subwrapper"]', ))
-        self.assertEqual([link for link in lx.extract_links(self.response)], [
-            Link(url='http://example.com/sample1.html', text=u''),
-            Link(url='http://example.com/sample2.html', text=u'sample 2'),
-        ])
-
-    def test_restrict_xpaths_encoding(self):
-        """Test restrict_xpaths with encodings"""
-        html = """<html><head><title>Page title<title>
-        <body><p><a href="item/12.html">Item 12</a></p>
-        <div class='links'>
-        <p><a href="/about.html">About us\xa3</a></p>
-        </div>
-        <div>
-        <p><a href="/nofollow.html">This shouldn't be followed</a></p>
-        </div>
-        </body></html>"""
-        response = HtmlResponse("http://example.org/somepage/index.html", body=html, encoding='windows-1252')
-
-        lx = self.extractor_cls(restrict_xpaths="//div[@class='links']")
-        self.assertEqual(lx.extract_links(response),
-                         [Link(url='http://example.org/about.html', text=u'About us\xa3')])
-
-    def test_restrict_xpaths_with_html_entities(self):
-        html = '<html><body><p><a href="/&hearts;/you?c=&euro;">text</a></p></body></html>'
-        response = HtmlResponse("http://example.org/somepage/index.html", body=html, encoding='iso8859-15')
-        links = SgmlLinkExtractor(restrict_xpaths='//p').extract_links(response)
-        self.assertEqual(links,
-                         [Link(url='http://example.org/%E2%99%A5/you?c=%E2%82%AC', text=u'text')])
-
-    def test_restrict_xpaths_concat_in_handle_data(self):
-        """html entities cause SGMLParser to call handle_data hook twice"""
-        body = """<html><body><div><a href="/foo">&gt;\xbe\xa9&lt;\xb6\xab</a></body></html>"""
-        response = HtmlResponse("http://example.org", body=body, encoding='gb18030')
-        lx = self.extractor_cls(restrict_xpaths="//div")
-        self.assertEqual(lx.extract_links(response),
-                         [Link(url='http://example.org/foo', text=u'>\u4eac<\u4e1c',
-                               fragment='', nofollow=False)])
-
-    def test_encoded_url(self):
-        body = """<html><body><div><a href="?page=2">BinB</a></body></html>"""
-        response = HtmlResponse("http://known.fm/AC%2FDC/", body=body, encoding='utf8')
-        lx = self.extractor_cls()
-        self.assertEqual(lx.extract_links(response), [
-            Link(url='http://known.fm/AC%2FDC/?page=2', text=u'BinB', fragment='', nofollow=False),
-        ])
-
-    def test_encoded_url_in_restricted_xpath(self):
-        body = """<html><body><div><a href="?page=2">BinB</a></body></html>"""
-        response = HtmlResponse("http://known.fm/AC%2FDC/", body=body, encoding='utf8')
-        lx = self.extractor_cls(restrict_xpaths="//div")
-        self.assertEqual(lx.extract_links(response), [
-            Link(url='http://known.fm/AC%2FDC/?page=2', text=u'BinB', fragment='', nofollow=False),
-        ])
-
-    def test_deny_extensions(self):
-        html = """<a href="page.html">asd</a> and <a href="photo.jpg">"""
-        response = HtmlResponse("http://example.org/", body=html)
-        lx = self.extractor_cls()
-        self.assertEqual(lx.extract_links(response), [
-            Link(url='http://example.org/page.html', text=u'asd'),
-        ])
-
-        lx = SgmlLinkExtractor(deny_extensions="jpg")
-        self.assertEqual(lx.extract_links(response), [
-            Link(url='http://example.org/page.html', text=u'asd'),
-        ])
-
-    def test_process_value(self):
-        """Test restrict_xpaths with encodings"""
-        html = """
-        <a href="javascript:goToPage('../other/page.html','photo','width=600,height=540,scrollbars'); return false">Link text</a>
-        <a href="/about.html">About us</a>
-        """
-        response = HtmlResponse("http://example.org/somepage/index.html", body=html, encoding='windows-1252')
-
-        def process_value(value):
-            m = re.search("javascript:goToPage\('(.*?)'", value)
-            if m:
-                return m.group(1)
-
-        lx = self.extractor_cls(process_value=process_value)
-        self.assertEqual(lx.extract_links(response),
-                         [Link(url='http://example.org/other/page.html', text='Link text')])
-
-    def test_base_url_with_restrict_xpaths(self):
-        html = """<html><head><title>Page title<title><base href="http://otherdomain.com/base/" />
-        <body><p><a href="item/12.html">Item 12</a></p>
-        </body></html>"""
-        response = HtmlResponse("http://example.org/somepage/index.html", body=html)
-        lx = self.extractor_cls(restrict_xpaths="//p")
-        self.assertEqual(lx.extract_links(response),
-                         [Link(url='http://otherdomain.com/base/item/12.html', text='Item 12')])
-
-    def test_attrs(self):
-        lx = self.extractor_cls(attrs="href")
-        self.assertEqual(lx.extract_links(self.response), [
-            Link(url='http://example.com/sample1.html', text=u''),
-            Link(url='http://example.com/sample2.html', text=u'sample 2'),
-            Link(url='http://example.com/sample3.html', text=u'sample 3 text'),
-            Link(url='http://www.google.com/something', text=u''),
-            Link(url='http://example.com/innertag.html', text=u'inner tag'),
-        ])
-
-        lx = self.extractor_cls(attrs=("href","src"), tags=("a","area","img"), deny_extensions=())
-        self.assertEqual(lx.extract_links(self.response), [
-            Link(url='http://example.com/sample1.html', text=u''),
-            Link(url='http://example.com/sample2.html', text=u'sample 2'),
-            Link(url='http://example.com/sample2.jpg', text=u''),
-            Link(url='http://example.com/sample3.html', text=u'sample 3 text'),
-            Link(url='http://www.google.com/something', text=u''),
-            Link(url='http://example.com/innertag.html', text=u'inner tag'),
-        ])
-
-        lx = self.extractor_cls(attrs=None)
-        self.assertEqual(lx.extract_links(self.response), [])
-
-        html = """<html><area href="sample1.html"></area><a ref="sample2.html">sample text 2</a></html>"""
-        response = HtmlResponse("http://example.com/index.html", body=html)
-        lx = SgmlLinkExtractor(attrs=("href"))
-        self.assertEqual(lx.extract_links(response), [
-            Link(url='http://example.com/sample1.html', text=u''),
-        ])
-
-    def test_tags(self):
-        html = """<html><area href="sample1.html"></area><a href="sample2.html">sample 2</a><img src="sample2.jpg"/></html>"""
-        response = HtmlResponse("http://example.com/index.html", body=html)
-
-        lx = self.extractor_cls(tags=None)
-        self.assertEqual(lx.extract_links(response), [])
-
-        lx = self.extractor_cls()
-        self.assertEqual(lx.extract_links(response), [
-            Link(url='http://example.com/sample1.html', text=u''),
-            Link(url='http://example.com/sample2.html', text=u'sample 2'),
-        ])
-
-        lx = self.extractor_cls(tags="area")
-        self.assertEqual(lx.extract_links(response), [
-            Link(url='http://example.com/sample1.html', text=u''),
-        ])
-
-        lx = self.extractor_cls(tags="a")
-        self.assertEqual(lx.extract_links(response), [
-            Link(url='http://example.com/sample2.html', text=u'sample 2'),
-        ])
-
-        lx = self.extractor_cls(tags=("a","img"), attrs=("href", "src"), deny_extensions=())
-        self.assertEqual(lx.extract_links(response), [
-            Link(url='http://example.com/sample2.html', text=u'sample 2'),
-            Link(url='http://example.com/sample2.jpg', text=u''),
-        ])
-
-    def test_tags_attrs(self):
-        html = """
-        <html><body>
-        <div id="item1" data-url="get?id=1"><a href="#">Item 1</a></div>
-        <div id="item2" data-url="get?id=2"><a href="#">Item 2</a></div>
-        </body></html>
-        """
-        response = HtmlResponse("http://example.com/index.html", body=html)
-
-        lx = self.extractor_cls(tags='div', attrs='data-url')
-        self.assertEqual(lx.extract_links(response), [
-            Link(url='http://example.com/get?id=1', text=u'Item 1', fragment='', nofollow=False),
-            Link(url='http://example.com/get?id=2', text=u'Item 2', fragment='', nofollow=False)
-        ])
-
-        lx = self.extractor_cls(tags=('div',), attrs=('data-url',))
-        self.assertEqual(lx.extract_links(response), [
-            Link(url='http://example.com/get?id=1', text=u'Item 1', fragment='', nofollow=False),
-            Link(url='http://example.com/get?id=2', text=u'Item 2', fragment='', nofollow=False)
-        ])
-
-    def test_xhtml(self):
-        xhtml = """
-<?xml version="1.0"?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
-    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
-<head>
-    <title>XHTML document title</title>
-</head>
-<body>
-    <div class='links'>
-    <p><a href="/about.html">About us</a></p>
-    </div>
-    <div>
-    <p><a href="/follow.html">Follow this link</a></p>
-    </div>
-    <div>
-    <p><a href="/nofollow.html" rel="nofollow">Dont follow this one</a></p>
-    </div>
-    <div>
-    <p><a href="/nofollow2.html" rel="blah">Choose to follow or not</a></p>
-    </div>
-</body>
-</html>
-        """
-
-        response = HtmlResponse("http://example.com/index.xhtml", body=xhtml)
-
-        lx = self.extractor_cls()
-        self.assertEqual(lx.extract_links(response),
-                         [Link(url='http://example.com/about.html', text=u'About us', fragment='', nofollow=False),
-                          Link(url='http://example.com/follow.html', text=u'Follow this link', fragment='', nofollow=False),
-                          Link(url='http://example.com/nofollow.html', text=u'Dont follow this one', fragment='', nofollow=True),
-                          Link(url='http://example.com/nofollow2.html', text=u'Choose to follow or not', fragment='', nofollow=False)]
-                        )
-
-        response = XmlResponse("http://example.com/index.xhtml", body=xhtml)
-
-        lx = self.extractor_cls()
-        self.assertEqual(lx.extract_links(response),
-                         [Link(url='http://example.com/about.html', text=u'About us', fragment='', nofollow=False),
-                          Link(url='http://example.com/follow.html', text=u'Follow this link', fragment='', nofollow=False),
-                          Link(url='http://example.com/nofollow.html', text=u'Dont follow this one', fragment='', nofollow=True),
-                          Link(url='http://example.com/nofollow2.html', text=u'Choose to follow or not', fragment='', nofollow=False)]
-                        )
-
-
-class LxmlLinkExtractorTestCase(SgmlLinkExtractorTestCase):
-    extractor_cls = LxmlLinkExtractor
-
-
-class HtmlParserLinkExtractorTestCase(unittest.TestCase):
-
-    def setUp(self):
-        body = get_testdata('link_extractor', 'sgml_linkextractor.html')
-        self.response = HtmlResponse(url='http://example.com/index', body=body)
-
-    def test_extraction(self):
-        # Default arguments
-        lx = HtmlParserLinkExtractor()
-        self.assertEqual(lx.extract_links(self.response),
-                         [Link(url='http://example.com/sample2.html', text=u'sample 2'),
-                          Link(url='http://example.com/sample3.html', text=u'sample 3 text'),
-                          Link(url='http://example.com/sample3.html', text=u'sample 3 repetition'),
-                          Link(url='http://www.google.com/something', text=u''),
-                          Link(url='http://example.com/innertag.html', text=u'inner tag'),])
-
-
-class RegexLinkExtractorTestCase(unittest.TestCase):
-
-    def setUp(self):
-        body = get_testdata('link_extractor', 'sgml_linkextractor.html')
-        self.response = HtmlResponse(url='http://example.com/index', body=body)
-
-    def test_extraction(self):
-        # Default arguments
-        lx = RegexLinkExtractor()
-        self.assertEqual(lx.extract_links(self.response),
-                         [Link(url='http://example.com/sample2.html', text=u'sample 2'),
-                          Link(url='http://example.com/sample3.html', text=u'sample 3 text'),
-                          Link(url='http://www.google.com/something', text=u''),
-                          Link(url='http://example.com/innertag.html', text=u'inner tag'),])
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/test_contrib_loader.py b/tests/test_contrib_loader.py
deleted file mode 100644
index 3330b6105..000000000
--- a/tests/test_contrib_loader.py
+++ /dev/null
@@ -1,583 +0,0 @@
-import unittest
-from functools import partial
-
-from scrapy.contrib.loader import ItemLoader
-from scrapy.contrib.loader.processor import Join, Identity, TakeFirst, \
-    Compose, MapCompose
-from scrapy.item import Item, Field
-from scrapy.selector import Selector
-from scrapy.http import HtmlResponse
-
-
-# test items
-class NameItem(Item):
-    name = Field()
-
-
-class TestItem(NameItem):
-    url = Field()
-    summary = Field()
-
-
-# test item loaders
-class NameItemLoader(ItemLoader):
-    default_item_class = TestItem
-
-
-class TestItemLoader(NameItemLoader):
-    name_in = MapCompose(lambda v: v.title())
-
-
-class DefaultedItemLoader(NameItemLoader):
-    default_input_processor = MapCompose(lambda v: v[:-1])
-
-
-# test processors
-def processor_with_args(value, other=None, loader_context=None):
-    if 'key' in loader_context:
-        return loader_context['key']
-    return value
-
-
-class BasicItemLoaderTest(unittest.TestCase):
-
-    def test_load_item_using_default_loader(self):
-        i = TestItem()
-        i['summary'] = u'lala'
-        il = ItemLoader(item=i)
-        il.add_value('name', u'marta')
-        item = il.load_item()
-        assert item is i
-        self.assertEqual(item['summary'], u'lala')
-        self.assertEqual(item['name'], [u'marta'])
-
-    def test_load_item_using_custom_loader(self):
-        il = TestItemLoader()
-        il.add_value('name', u'marta')
-        item = il.load_item()
-        self.assertEqual(item['name'], [u'Marta'])
-
-    def test_load_item_ignore_none_field_values(self):
-        def validate_sku(value):
-            # Let's assume a SKU is only digits.
-            if value.isdigit():
-                return value
-
-        class MyLoader(ItemLoader):
-            name_out = Compose(lambda vs: vs[0])  # take first which allows empty values
-            price_out = Compose(TakeFirst(), float)
-            sku_out = Compose(TakeFirst(), validate_sku)
-
-        valid_fragment = u'SKU: 1234'
-        invalid_fragment = u'SKU: not available'
-        sku_re = 'SKU: (.+)'
-
-        il = MyLoader(item={})
-        # Should not return "sku: None".
-        il.add_value('sku', [invalid_fragment], re=sku_re)
-        # Should not ignore empty values.
-        il.add_value('name', u'')
-        il.add_value('price', [u'0'])
-        self.assertEqual(il.load_item(), {
-            'name': u'',
-            'price': 0.0,
-        })
-
-        il.replace_value('sku', [valid_fragment], re=sku_re)
-        self.assertEqual(il.load_item()['sku'], u'1234')
-
-    def test_add_value(self):
-        il = TestItemLoader()
-        il.add_value('name', u'marta')
-        self.assertEqual(il.get_collected_values('name'), [u'Marta'])
-        self.assertEqual(il.get_output_value('name'), [u'Marta'])
-        il.add_value('name', u'pepe')
-        self.assertEqual(il.get_collected_values('name'), [u'Marta', u'Pepe'])
-        self.assertEqual(il.get_output_value('name'), [u'Marta', u'Pepe'])
-
-        # test add object value
-        il.add_value('summary', {'key': 1})
-        self.assertEqual(il.get_collected_values('summary'), [{'key': 1}])
-
-        il.add_value(None, u'Jim', lambda x: {'name': x})
-        self.assertEqual(il.get_collected_values('name'), [u'Marta', u'Pepe', u'Jim'])
-
-    def test_add_zero(self):
-        il = NameItemLoader()
-        il.add_value('name', 0)
-        self.assertEqual(il.get_collected_values('name'), [0])
-
-    def test_replace_value(self):
-        il = TestItemLoader()
-        il.replace_value('name', u'marta')
-        self.assertEqual(il.get_collected_values('name'), [u'Marta'])
-        self.assertEqual(il.get_output_value('name'), [u'Marta'])
-        il.replace_value('name', u'pepe')
-        self.assertEqual(il.get_collected_values('name'), [u'Pepe'])
-        self.assertEqual(il.get_output_value('name'), [u'Pepe'])
-
-        il.replace_value(None, u'Jim', lambda x: {'name': x})
-        self.assertEqual(il.get_collected_values('name'), [u'Jim'])
-
-    def test_get_value(self):
-        il = NameItemLoader()
-        self.assertEqual(u'FOO', il.get_value([u'foo', u'bar'], TakeFirst(), unicode.upper))
-        self.assertEqual([u'foo', u'bar'], il.get_value([u'name:foo', u'name:bar'], re=u'name:(.*)$'))
-        self.assertEqual(u'foo', il.get_value([u'name:foo', u'name:bar'], TakeFirst(), re=u'name:(.*)$'))
-
-        il.add_value('name', [u'name:foo', u'name:bar'], TakeFirst(), re=u'name:(.*)$')
-        self.assertEqual([u'foo'], il.get_collected_values('name'))
-        il.replace_value('name', u'name:bar', re=u'name:(.*)$')
-        self.assertEqual([u'bar'], il.get_collected_values('name'))
-
-    def test_iter_on_input_processor_input(self):
-        class NameFirstItemLoader(NameItemLoader):
-            name_in = TakeFirst()
-
-        il = NameFirstItemLoader()
-        il.add_value('name', u'marta')
-        self.assertEqual(il.get_collected_values('name'), [u'marta'])
-        il = NameFirstItemLoader()
-        il.add_value('name', [u'marta', u'jose'])
-        self.assertEqual(il.get_collected_values('name'), [u'marta'])
-
-        il = NameFirstItemLoader()
-        il.replace_value('name', u'marta')
-        self.assertEqual(il.get_collected_values('name'), [u'marta'])
-        il = NameFirstItemLoader()
-        il.replace_value('name', [u'marta', u'jose'])
-        self.assertEqual(il.get_collected_values('name'), [u'marta'])
-
-        il = NameFirstItemLoader()
-        il.add_value('name', u'marta')
-        il.add_value('name', [u'jose', u'pedro'])
-        self.assertEqual(il.get_collected_values('name'), [u'marta', u'jose'])
-
-    def test_map_compose_filter(self):
-        def filter_world(x):
-            return None if x == 'world' else x
-
-        proc = MapCompose(filter_world, str.upper)
-        self.assertEqual(proc(['hello', 'world', 'this', 'is', 'scrapy']),
-                         ['HELLO', 'THIS', 'IS', 'SCRAPY'])
-
-    def test_map_compose_filter_multil(self):
-        class TestItemLoader(NameItemLoader):
-            name_in = MapCompose(lambda v: v.title(), lambda v: v[:-1])
-
-        il = TestItemLoader()
-        il.add_value('name', u'marta')
-        self.assertEqual(il.get_output_value('name'), [u'Mart'])
-        item = il.load_item()
-        self.assertEqual(item['name'], [u'Mart'])
-
-    def test_default_input_processor(self):
-        il = DefaultedItemLoader()
-        il.add_value('name', u'marta')
-        self.assertEqual(il.get_output_value('name'), [u'mart'])
-
-    def test_inherited_default_input_processor(self):
-        class InheritDefaultedItemLoader(DefaultedItemLoader):
-            pass
-
-        il = InheritDefaultedItemLoader()
-        il.add_value('name', u'marta')
-        self.assertEqual(il.get_output_value('name'), [u'mart'])
-
-    def test_input_processor_inheritance(self):
-        class ChildItemLoader(TestItemLoader):
-            url_in = MapCompose(lambda v: v.lower())
-
-        il = ChildItemLoader()
-        il.add_value('url', u'HTTP://scrapy.ORG')
-        self.assertEqual(il.get_output_value('url'), [u'http://scrapy.org'])
-        il.add_value('name', u'marta')
-        self.assertEqual(il.get_output_value('name'), [u'Marta'])
-
-        class ChildChildItemLoader(ChildItemLoader):
-            url_in = MapCompose(lambda v: v.upper())
-            summary_in = MapCompose(lambda v: v)
-
-        il = ChildChildItemLoader()
-        il.add_value('url', u'http://scrapy.org')
-        self.assertEqual(il.get_output_value('url'), [u'HTTP://SCRAPY.ORG'])
-        il.add_value('name', u'marta')
-        self.assertEqual(il.get_output_value('name'), [u'Marta'])
-
-    def test_empty_map_compose(self):
-        class IdentityDefaultedItemLoader(DefaultedItemLoader):
-            name_in = MapCompose()
-
-        il = IdentityDefaultedItemLoader()
-        il.add_value('name', u'marta')
-        self.assertEqual(il.get_output_value('name'), [u'marta'])
-
-    def test_identity_input_processor(self):
-        class IdentityDefaultedItemLoader(DefaultedItemLoader):
-            name_in = Identity()
-
-        il = IdentityDefaultedItemLoader()
-        il.add_value('name', u'marta')
-        self.assertEqual(il.get_output_value('name'), [u'marta'])
-
-    def test_extend_custom_input_processors(self):
-        class ChildItemLoader(TestItemLoader):
-            name_in = MapCompose(TestItemLoader.name_in, unicode.swapcase)
-
-        il = ChildItemLoader()
-        il.add_value('name', u'marta')
-        self.assertEqual(il.get_output_value('name'), [u'mARTA'])
-
-    def test_extend_default_input_processors(self):
-        class ChildDefaultedItemLoader(DefaultedItemLoader):
-            name_in = MapCompose(DefaultedItemLoader.default_input_processor, unicode.swapcase)
-
-        il = ChildDefaultedItemLoader()
-        il.add_value('name', u'marta')
-        self.assertEqual(il.get_output_value('name'), [u'MART'])
-
-    def test_output_processor_using_function(self):
-        il = TestItemLoader()
-        il.add_value('name', [u'mar', u'ta'])
-        self.assertEqual(il.get_output_value('name'), [u'Mar', u'Ta'])
-
-        class TakeFirstItemLoader(TestItemLoader):
-            name_out = u" ".join
-
-        il = TakeFirstItemLoader()
-        il.add_value('name', [u'mar', u'ta'])
-        self.assertEqual(il.get_output_value('name'), u'Mar Ta')
-
-    def test_output_processor_error(self):
-        class TestItemLoader(ItemLoader):
-            default_item_class = TestItem
-            name_out = MapCompose(float)
-
-        il = TestItemLoader()
-        il.add_value('name', [u'$10'])
-        try:
-            float('$10')
-        except Exception as e:
-            expected_exc_str = str(e)
-
-        exc = None
-        try:
-            il.load_item()
-        except Exception as e:
-            exc = e
-        assert isinstance(exc, ValueError)
-        s = str(exc)
-        assert 'name' in s, s
-        assert '$10' in s, s
-        assert 'ValueError' in s, s
-        assert expected_exc_str in s, s
-
-    def test_output_processor_using_classes(self):
-        il = TestItemLoader()
-        il.add_value('name', [u'mar', u'ta'])
-        self.assertEqual(il.get_output_value('name'), [u'Mar', u'Ta'])
-
-        class TakeFirstItemLoader(TestItemLoader):
-            name_out = Join()
-
-        il = TakeFirstItemLoader()
-        il.add_value('name', [u'mar', u'ta'])
-        self.assertEqual(il.get_output_value('name'), u'Mar Ta')
-
-        class TakeFirstItemLoader(TestItemLoader):
-            name_out = Join("<br>")
-
-        il = TakeFirstItemLoader()
-        il.add_value('name', [u'mar', u'ta'])
-        self.assertEqual(il.get_output_value('name'), u'Mar<br>Ta')
-
-    def test_default_output_processor(self):
-        il = TestItemLoader()
-        il.add_value('name', [u'mar', u'ta'])
-        self.assertEqual(il.get_output_value('name'), [u'Mar', u'Ta'])
-
-        class LalaItemLoader(TestItemLoader):
-            default_output_processor = Identity()
-
-        il = LalaItemLoader()
-        il.add_value('name', [u'mar', u'ta'])
-        self.assertEqual(il.get_output_value('name'), [u'Mar', u'Ta'])
-
-    def test_loader_context_on_declaration(self):
-        class ChildItemLoader(TestItemLoader):
-            url_in = MapCompose(processor_with_args, key=u'val')
-
-        il = ChildItemLoader()
-        il.add_value('url', u'text')
-        self.assertEqual(il.get_output_value('url'), ['val'])
-        il.replace_value('url', u'text2')
-        self.assertEqual(il.get_output_value('url'), ['val'])
-
-    def test_loader_context_on_instantiation(self):
-        class ChildItemLoader(TestItemLoader):
-            url_in = MapCompose(processor_with_args)
-
-        il = ChildItemLoader(key=u'val')
-        il.add_value('url', u'text')
-        self.assertEqual(il.get_output_value('url'), ['val'])
-        il.replace_value('url', u'text2')
-        self.assertEqual(il.get_output_value('url'), ['val'])
-
-    def test_loader_context_on_assign(self):
-        class ChildItemLoader(TestItemLoader):
-            url_in = MapCompose(processor_with_args)
-
-        il = ChildItemLoader()
-        il.context['key'] = u'val'
-        il.add_value('url', u'text')
-        self.assertEqual(il.get_output_value('url'), ['val'])
-        il.replace_value('url', u'text2')
-        self.assertEqual(il.get_output_value('url'), ['val'])
-
-    def test_item_passed_to_input_processor_functions(self):
-        def processor(value, loader_context):
-            return loader_context['item']['name']
-
-        class ChildItemLoader(TestItemLoader):
-            url_in = MapCompose(processor)
-
-        it = TestItem(name='marta')
-        il = ChildItemLoader(item=it)
-        il.add_value('url', u'text')
-        self.assertEqual(il.get_output_value('url'), ['marta'])
-        il.replace_value('url', u'text2')
-        self.assertEqual(il.get_output_value('url'), ['marta'])
-
-    def test_add_value_on_unknown_field(self):
-        il = TestItemLoader()
-        self.assertRaises(KeyError, il.add_value, 'wrong_field', [u'lala', u'lolo'])
-
-    def test_compose_processor(self):
-        class TestItemLoader(NameItemLoader):
-            name_out = Compose(lambda v: v[0], lambda v: v.title(), lambda v: v[:-1])
-
-        il = TestItemLoader()
-        il.add_value('name', [u'marta', u'other'])
-        self.assertEqual(il.get_output_value('name'), u'Mart')
-        item = il.load_item()
-        self.assertEqual(item['name'], u'Mart')
-
-    def test_partial_processor(self):
-        def join(values, sep=None, loader_context=None, ignored=None):
-            if sep is not None:
-                return sep.join(values)
-            elif loader_context and 'sep' in loader_context:
-                return loader_context['sep'].join(values)
-            else:
-                return ''.join(values)
-
-        class TestItemLoader(NameItemLoader):
-            name_out = Compose(partial(join, sep='+'))
-            url_out = Compose(partial(join, loader_context={'sep': '.'}))
-            summary_out = Compose(partial(join, ignored='foo'))
-
-        il = TestItemLoader()
-        il.add_value('name', [u'rabbit', u'hole'])
-        il.add_value('url', [u'rabbit', u'hole'])
-        il.add_value('summary', [u'rabbit', u'hole'])
-        item = il.load_item()
-        self.assertEqual(item['name'], u'rabbit+hole')
-        self.assertEqual(item['url'], u'rabbit.hole')
-        self.assertEqual(item['summary'], u'rabbithole')
-
-
-class ProcessorsTest(unittest.TestCase):
-
-    def test_take_first(self):
-        proc = TakeFirst()
-        self.assertEqual(proc([None, '', 'hello', 'world']), 'hello')
-        self.assertEqual(proc([None, '', 0, 'hello', 'world']), 0)
-
-    def test_identity(self):
-        proc = Identity()
-        self.assertEqual(proc([None, '', 'hello', 'world']),
-                         [None, '', 'hello', 'world'])
-
-    def test_join(self):
-        proc = Join()
-        self.assertRaises(TypeError, proc, [None, '', 'hello', 'world'])
-        self.assertEqual(proc(['', 'hello', 'world']), u' hello world')
-        self.assertEqual(proc(['hello', 'world']), u'hello world')
-        self.assert_(isinstance(proc(['hello', 'world']), unicode))
-
-    def test_compose(self):
-        proc = Compose(lambda v: v[0], str.upper)
-        self.assertEqual(proc(['hello', 'world']), 'HELLO')
-        proc = Compose(str.upper)
-        self.assertEqual(proc(None), None)
-        proc = Compose(str.upper, stop_on_none=False)
-        self.assertRaises(TypeError, proc, None)
-
-    def test_mapcompose(self):
-        filter_world = lambda x: None if x == 'world' else x
-        proc = MapCompose(filter_world, unicode.upper)
-        self.assertEqual(proc([u'hello', u'world', u'this', u'is', u'scrapy']),
-                         [u'HELLO', u'THIS', u'IS', u'SCRAPY'])
-
-
-class SelectortemLoaderTest(unittest.TestCase):
-    response = HtmlResponse(url="", body="""
-    <html>
-    <body>
-    <div id="id">marta</div>
-    <p>paragraph</p>
-    <a href="http://www.scrapy.org">homepage</a>
-    <img src="/images/logo.png" width="244" height="65" alt="Scrapy">
-    </body>
-    </html>
-    """)
-
-    def test_constructor(self):
-        l = TestItemLoader()
-        self.assertEqual(l.selector, None)
-
-    def test_constructor_errors(self):
-        l = TestItemLoader()
-        self.assertRaises(RuntimeError, l.add_xpath, 'url', '//a/@href')
-        self.assertRaises(RuntimeError, l.replace_xpath, 'url', '//a/@href')
-        self.assertRaises(RuntimeError, l.get_xpath, '//a/@href')
-        self.assertRaises(RuntimeError, l.add_css, 'name', '#name::text')
-        self.assertRaises(RuntimeError, l.replace_css, 'name', '#name::text')
-        self.assertRaises(RuntimeError, l.get_css, '#name::text')
-
-    def test_constructor_with_selector(self):
-        sel = Selector(text=u"<html><body><div>marta</div></body></html>")
-        l = TestItemLoader(selector=sel)
-        self.assert_(l.selector is sel)
-
-        l.add_xpath('name', '//div/text()')
-        self.assertEqual(l.get_output_value('name'), [u'Marta'])
-
-    def test_constructor_with_selector_css(self):
-        sel = Selector(text=u"<html><body><div>marta</div></body></html>")
-        l = TestItemLoader(selector=sel)
-        self.assert_(l.selector is sel)
-
-        l.add_css('name', 'div::text')
-        self.assertEqual(l.get_output_value('name'), [u'Marta'])
-
-    def test_constructor_with_response(self):
-        l = TestItemLoader(response=self.response)
-        self.assert_(l.selector)
-
-        l.add_xpath('name', '//div/text()')
-        self.assertEqual(l.get_output_value('name'), [u'Marta'])
-
-    def test_constructor_with_response_css(self):
-        l = TestItemLoader(response=self.response)
-        self.assert_(l.selector)
-
-        l.add_css('name', 'div::text')
-        self.assertEqual(l.get_output_value('name'), [u'Marta'])
-
-        l.add_css('url', 'a::attr(href)')
-        self.assertEqual(l.get_output_value('url'), [u'http://www.scrapy.org'])
-
-        # combining/accumulating CSS selectors and XPath expressions
-        l.add_xpath('name', '//div/text()')
-        self.assertEqual(l.get_output_value('name'), [u'Marta', u'Marta'])
-
-        l.add_xpath('url', '//img/@src')
-        self.assertEqual(l.get_output_value('url'), [u'http://www.scrapy.org', u'/images/logo.png'])
-
-    def test_add_xpath_re(self):
-        l = TestItemLoader(response=self.response)
-        l.add_xpath('name', '//div/text()', re='ma')
-        self.assertEqual(l.get_output_value('name'), [u'Ma'])
-
-    def test_replace_xpath(self):
-        l = TestItemLoader(response=self.response)
-        self.assert_(l.selector)
-        l.add_xpath('name', '//div/text()')
-        self.assertEqual(l.get_output_value('name'), [u'Marta'])
-        l.replace_xpath('name', '//p/text()')
-        self.assertEqual(l.get_output_value('name'), [u'Paragraph'])
-
-        l.replace_xpath('name', ['//p/text()', '//div/text()'])
-        self.assertEqual(l.get_output_value('name'), [u'Paragraph', 'Marta'])
-
-    def test_get_xpath(self):
-        l = TestItemLoader(response=self.response)
-        self.assertEqual(l.get_xpath('//p/text()'), [u'paragraph'])
-        self.assertEqual(l.get_xpath('//p/text()', TakeFirst()), u'paragraph')
-        self.assertEqual(l.get_xpath('//p/text()', TakeFirst(), re='pa'), u'pa')
-
-        self.assertEqual(l.get_xpath(['//p/text()', '//div/text()']), [u'paragraph', 'marta'])
-
-    def test_replace_xpath_multi_fields(self):
-        l = TestItemLoader(response=self.response)
-        l.add_xpath(None, '//div/text()', TakeFirst(), lambda x: {'name': x})
-        self.assertEqual(l.get_output_value('name'), [u'Marta'])
-        l.replace_xpath(None, '//p/text()', TakeFirst(), lambda x: {'name': x})
-        self.assertEqual(l.get_output_value('name'), [u'Paragraph'])
-
-    def test_replace_xpath_re(self):
-        l = TestItemLoader(response=self.response)
-        self.assert_(l.selector)
-        l.add_xpath('name', '//div/text()')
-        self.assertEqual(l.get_output_value('name'), [u'Marta'])
-        l.replace_xpath('name', '//div/text()', re='ma')
-        self.assertEqual(l.get_output_value('name'), [u'Ma'])
-
-    def test_add_css_re(self):
-        l = TestItemLoader(response=self.response)
-        l.add_css('name', 'div::text', re='ma')
-        self.assertEqual(l.get_output_value('name'), [u'Ma'])
-
-        l.add_css('url', 'a::attr(href)', re='http://(.+)')
-        self.assertEqual(l.get_output_value('url'), [u'www.scrapy.org'])
-
-    def test_replace_css(self):
-        l = TestItemLoader(response=self.response)
-        self.assert_(l.selector)
-        l.add_css('name', 'div::text')
-        self.assertEqual(l.get_output_value('name'), [u'Marta'])
-        l.replace_css('name', 'p::text')
-        self.assertEqual(l.get_output_value('name'), [u'Paragraph'])
-
-        l.replace_css('name', ['p::text', 'div::text'])
-        self.assertEqual(l.get_output_value('name'), [u'Paragraph', 'Marta'])
-
-        l.add_css('url', 'a::attr(href)', re='http://(.+)')
-        self.assertEqual(l.get_output_value('url'), [u'www.scrapy.org'])
-        l.replace_css('url', 'img::attr(src)')
-        self.assertEqual(l.get_output_value('url'), [u'/images/logo.png'])
-
-    def test_get_css(self):
-        l = TestItemLoader(response=self.response)
-        self.assertEqual(l.get_css('p::text'), [u'paragraph'])
-        self.assertEqual(l.get_css('p::text', TakeFirst()), u'paragraph')
-        self.assertEqual(l.get_css('p::text', TakeFirst(), re='pa'), u'pa')
-
-        self.assertEqual(l.get_css(['p::text', 'div::text']), [u'paragraph', 'marta'])
-        self.assertEqual(l.get_css(['a::attr(href)', 'img::attr(src)']),
-            [u'http://www.scrapy.org', u'/images/logo.png'])
-
-    def test_replace_css_multi_fields(self):
-        l = TestItemLoader(response=self.response)
-        l.add_css(None, 'div::text', TakeFirst(), lambda x: {'name': x})
-        self.assertEqual(l.get_output_value('name'), [u'Marta'])
-        l.replace_css(None, 'p::text', TakeFirst(), lambda x: {'name': x})
-        self.assertEqual(l.get_output_value('name'), [u'Paragraph'])
-
-        l.add_css(None, 'a::attr(href)', TakeFirst(), lambda x: {'url': x})
-        self.assertEqual(l.get_output_value('url'), [u'http://www.scrapy.org'])
-        l.replace_css(None, 'img::attr(src)', TakeFirst(), lambda x: {'url': x})
-        self.assertEqual(l.get_output_value('url'), [u'/images/logo.png'])
-
-    def test_replace_css_re(self):
-        l = TestItemLoader(response=self.response)
-        self.assert_(l.selector)
-        l.add_css('url', 'a::attr(href)')
-        self.assertEqual(l.get_output_value('url'), [u'http://www.scrapy.org'])
-        l.replace_css('url', 'a::attr(href)', re='http://www\.(.+)')
-        self.assertEqual(l.get_output_value('url'), [u'scrapy.org'])
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/test_contrib_spiderstate.py b/tests/test_contrib_spiderstate.py
deleted file mode 100644
index 8e5897db7..000000000
--- a/tests/test_contrib_spiderstate.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import os
-from datetime import datetime
-from twisted.trial import unittest
-
-from scrapy.contrib.spiderstate import SpiderState
-from scrapy.spider import Spider
-
-
-class SpiderStateTest(unittest.TestCase):
-
-    def test_store_load(self):
-        jobdir = self.mktemp()
-        os.mkdir(jobdir)
-        spider = Spider(name='default')
-        dt = datetime.now()
-
-        ss = SpiderState(jobdir)
-        ss.spider_opened(spider)
-        spider.state['one'] = 1
-        spider.state['dt'] = dt
-        ss.spider_closed(spider)
-
-        spider2 = Spider(name='default')
-        ss2 = SpiderState(jobdir)
-        ss2.spider_opened(spider2)
-        self.assertEqual(spider.state, {'one': 1, 'dt': dt})
-        ss2.spider_closed(spider2)
-
-    def test_state_attribute(self):
-        # state attribute must be present if jobdir is not set, to provide a
-        # consistent interface
-        spider = Spider(name='default')
-        ss = SpiderState()
-        ss.spider_opened(spider)
-        self.assertEqual(spider.state, {})
-        ss.spider_closed(spider)
diff --git a/tests/test_crawl.py b/tests/test_crawl.py
index 9401bd0c9..642c24651 100644
--- a/tests/test_crawl.py
+++ b/tests/test_crawl.py
@@ -1,13 +1,41 @@
 import json
-import socket
-import mock
+import logging
+import sys
+from ipaddress import IPv4Address
+from socket import gethostbyname
+from urllib.parse import urlparse
+
+from pytest import mark
+from testfixtures import LogCapture
 from twisted.internet import defer
+from twisted.internet.ssl import Certificate
+from twisted.python.failure import Failure
 from twisted.trial.unittest import TestCase
-from scrapy.utils.test import docrawl, get_testlog
-from tests.spiders import FollowAllSpider, DelaySpider, SimpleSpider, \
-    BrokenStartRequestsSpider, SingleRequestSpider, DuplicateStartRequestsSpider
-from tests.mockserver import MockServer
+
+from scrapy import signals
+from scrapy.crawler import CrawlerRunner
+from scrapy.exceptions import StopDownload
 from scrapy.http import Request
+from scrapy.http.response import Response
+from scrapy.utils.python import to_unicode
+from tests.mockserver import MockServer
+from tests.spiders import (
+    AsyncDefAsyncioReqsReturnSpider,
+    AsyncDefAsyncioReturnSingleElementSpider,
+    AsyncDefAsyncioReturnSpider,
+    AsyncDefAsyncioSpider,
+    AsyncDefSpider,
+    BrokenStartRequestsSpider,
+    BytesReceivedCallbackSpider,
+    BytesReceivedErrbackSpider,
+    CrawlSpiderWithErrback,
+    CrawlSpiderWithParseMethod,
+    DelaySpider,
+    DuplicateStartRequestsSpider,
+    FollowAllSpider,
+    SimpleSpider,
+    SingleRequestSpider,
+)
 
 
 class CrawlTestCase(TestCase):
@@ -15,124 +43,148 @@ class CrawlTestCase(TestCase):
     def setUp(self):
         self.mockserver = MockServer()
         self.mockserver.__enter__()
+        self.runner = CrawlerRunner()
 
     def tearDown(self):
         self.mockserver.__exit__(None, None, None)
 
     @defer.inlineCallbacks
     def test_follow_all(self):
-        spider = FollowAllSpider()
-        yield docrawl(spider)
-        self.assertEqual(len(spider.urls_visited), 11)  # 10 + start_url
+        crawler = self.runner.create_crawler(FollowAllSpider)
+        yield crawler.crawl(mockserver=self.mockserver)
+        self.assertEqual(len(crawler.spider.urls_visited), 11)  # 10 + start_url
 
     @defer.inlineCallbacks
-    def test_delay(self):
-        # short to long delays
-        yield self._test_delay(0.2, False)
-        yield self._test_delay(1, False)
-        # randoms
-        yield self._test_delay(0.2, True)
-        yield self._test_delay(1, True)
+    def test_fixed_delay(self):
+        yield self._test_delay(total=3, delay=0.2)
 
     @defer.inlineCallbacks
-    def _test_delay(self, delay, randomize):
-        settings = {"DOWNLOAD_DELAY": delay, 'RANDOMIZE_DOWNLOAD_DELAY': randomize}
-        spider = FollowAllSpider(maxlatency=delay * 2)
-        yield docrawl(spider, settings)
-        t = spider.times
-        totaltime = t[-1] - t[0]
-        avgd = totaltime / (len(t) - 1)
-        tolerance = 0.6 if randomize else 0.2
-        self.assertTrue(avgd > delay * (1 - tolerance),
-                        "download delay too small: %s" % avgd)
+    def test_randomized_delay(self):
+        yield self._test_delay(total=3, delay=0.1, randomize=True)
+
+    @defer.inlineCallbacks
+    def _test_delay(self, total, delay, randomize=False):
+        crawl_kwargs = dict(
+            maxlatency=delay * 2,
+            mockserver=self.mockserver,
+            total=total,
+        )
+        tolerance = (1 - (0.6 if randomize else 0.2))
+
+        settings = {"DOWNLOAD_DELAY": delay,
+                    'RANDOMIZE_DOWNLOAD_DELAY': randomize}
+        crawler = CrawlerRunner(settings).create_crawler(FollowAllSpider)
+        yield crawler.crawl(**crawl_kwargs)
+        times = crawler.spider.times
+        total_time = times[-1] - times[0]
+        average = total_time / (len(times) - 1)
+        self.assertTrue(average > delay * tolerance,
+                        "download delay too small: %s" % average)
+
+        # Ensure that the same test parameters would cause a failure if no
+        # download delay is set. Otherwise, it means we are using a combination
+        # of ``total`` and ``delay`` values that are too small for the test
+        # code above to have any meaning.
+        settings["DOWNLOAD_DELAY"] = 0
+        crawler = CrawlerRunner(settings).create_crawler(FollowAllSpider)
+        yield crawler.crawl(**crawl_kwargs)
+        times = crawler.spider.times
+        total_time = times[-1] - times[0]
+        average = total_time / (len(times) - 1)
+        self.assertFalse(average > delay / tolerance,
+                         "test total or delay values are too small")
 
     @defer.inlineCallbacks
     def test_timeout_success(self):
-        spider = DelaySpider(n=0.5)
-        yield docrawl(spider)
-        self.assertTrue(spider.t1 > 0)
-        self.assertTrue(spider.t2 > 0)
-        self.assertTrue(spider.t2 > spider.t1)
+        crawler = self.runner.create_crawler(DelaySpider)
+        yield crawler.crawl(n=0.5, mockserver=self.mockserver)
+        self.assertTrue(crawler.spider.t1 > 0)
+        self.assertTrue(crawler.spider.t2 > 0)
+        self.assertTrue(crawler.spider.t2 > crawler.spider.t1)
 
     @defer.inlineCallbacks
     def test_timeout_failure(self):
-        spider = DelaySpider(n=0.5)
-        yield docrawl(spider, {"DOWNLOAD_TIMEOUT": 0.35})
-        self.assertTrue(spider.t1 > 0)
-        self.assertTrue(spider.t2 == 0)
-        self.assertTrue(spider.t2_err > 0)
-        self.assertTrue(spider.t2_err > spider.t1)
+        crawler = CrawlerRunner({"DOWNLOAD_TIMEOUT": 0.35}).create_crawler(DelaySpider)
+        yield crawler.crawl(n=0.5, mockserver=self.mockserver)
+        self.assertTrue(crawler.spider.t1 > 0)
+        self.assertTrue(crawler.spider.t2 == 0)
+        self.assertTrue(crawler.spider.t2_err > 0)
+        self.assertTrue(crawler.spider.t2_err > crawler.spider.t1)
         # server hangs after receiving response headers
-        spider = DelaySpider(n=0.5, b=1)
-        yield docrawl(spider, {"DOWNLOAD_TIMEOUT": 0.35})
-        self.assertTrue(spider.t1 > 0)
-        self.assertTrue(spider.t2 == 0)
-        self.assertTrue(spider.t2_err > 0)
-        self.assertTrue(spider.t2_err > spider.t1)
+        yield crawler.crawl(n=0.5, b=1, mockserver=self.mockserver)
+        self.assertTrue(crawler.spider.t1 > 0)
+        self.assertTrue(crawler.spider.t2 == 0)
+        self.assertTrue(crawler.spider.t2_err > 0)
+        self.assertTrue(crawler.spider.t2_err > crawler.spider.t1)
 
     @defer.inlineCallbacks
     def test_retry_503(self):
-        spider = SimpleSpider("http://localhost:8998/status?n=503")
-        yield docrawl(spider)
-        self._assert_retried()
+        crawler = self.runner.create_crawler(SimpleSpider)
+        with LogCapture() as log:
+            yield crawler.crawl(self.mockserver.url("/status?n=503"), mockserver=self.mockserver)
+        self._assert_retried(log)
 
     @defer.inlineCallbacks
     def test_retry_conn_failed(self):
-        spider = SimpleSpider("http://localhost:65432/status?n=503")
-        yield docrawl(spider)
-        self._assert_retried()
+        crawler = self.runner.create_crawler(SimpleSpider)
+        with LogCapture() as log:
+            yield crawler.crawl("http://localhost:65432/status?n=503", mockserver=self.mockserver)
+        self._assert_retried(log)
 
     @defer.inlineCallbacks
     def test_retry_dns_error(self):
-        with mock.patch('socket.gethostbyname',
-                        side_effect=socket.gaierror(-5, 'No address associated with hostname')):
-            spider = SimpleSpider("http://example.com/")
-            yield docrawl(spider)
-            self._assert_retried()
+        crawler = self.runner.create_crawler(SimpleSpider)
+        with LogCapture() as log:
+            # try to fetch the homepage of a non-existent domain
+            yield crawler.crawl("http://dns.resolution.invalid./", mockserver=self.mockserver)
+        self._assert_retried(log)
 
     @defer.inlineCallbacks
     def test_start_requests_bug_before_yield(self):
-        spider = BrokenStartRequestsSpider(fail_before_yield=1)
-        yield docrawl(spider)
-        errors = self.flushLoggedErrors(ZeroDivisionError)
-        self.assertEqual(len(errors), 1)
+        with LogCapture('scrapy', level=logging.ERROR) as log:
+            crawler = self.runner.create_crawler(BrokenStartRequestsSpider)
+            yield crawler.crawl(fail_before_yield=1, mockserver=self.mockserver)
+
+        self.assertEqual(len(log.records), 1)
+        record = log.records[0]
+        self.assertIsNotNone(record.exc_info)
+        self.assertIs(record.exc_info[0], ZeroDivisionError)
 
     @defer.inlineCallbacks
     def test_start_requests_bug_yielding(self):
-        spider = BrokenStartRequestsSpider(fail_yielding=1)
-        yield docrawl(spider)
-        errors = self.flushLoggedErrors(ZeroDivisionError)
-        self.assertEqual(len(errors), 1)
+        with LogCapture('scrapy', level=logging.ERROR) as log:
+            crawler = self.runner.create_crawler(BrokenStartRequestsSpider)
+            yield crawler.crawl(fail_yielding=1, mockserver=self.mockserver)
+
+        self.assertEqual(len(log.records), 1)
+        record = log.records[0]
+        self.assertIsNotNone(record.exc_info)
+        self.assertIs(record.exc_info[0], ZeroDivisionError)
 
     @defer.inlineCallbacks
     def test_start_requests_lazyness(self):
         settings = {"CONCURRENT_REQUESTS": 1}
-        spider = BrokenStartRequestsSpider()
-        yield docrawl(spider, settings)
-        #self.assertTrue(False, spider.seedsseen)
-        #self.assertTrue(spider.seedsseen.index(None) < spider.seedsseen.index(99),
-        #                spider.seedsseen)
+        crawler = CrawlerRunner(settings).create_crawler(BrokenStartRequestsSpider)
+        yield crawler.crawl(mockserver=self.mockserver)
+        self.assertTrue(
+            crawler.spider.seedsseen.index(None) < crawler.spider.seedsseen.index(99),
+            crawler.spider.seedsseen)
 
     @defer.inlineCallbacks
     def test_start_requests_dupes(self):
         settings = {"CONCURRENT_REQUESTS": 1}
-        spider = DuplicateStartRequestsSpider(dont_filter=True,
-                                              distinct_urls=2,
-                                              dupe_factor=3)
-        yield docrawl(spider, settings)
-        self.assertEqual(spider.visited, 6)
+        crawler = CrawlerRunner(settings).create_crawler(DuplicateStartRequestsSpider)
+        yield crawler.crawl(dont_filter=True, distinct_urls=2, dupe_factor=3, mockserver=self.mockserver)
+        self.assertEqual(crawler.spider.visited, 6)
 
-        spider = DuplicateStartRequestsSpider(dont_filter=False,
-                                              distinct_urls=3,
-                                              dupe_factor=4)
-        yield docrawl(spider, settings)
-        self.assertEqual(spider.visited, 3)
+        yield crawler.crawl(dont_filter=False, distinct_urls=3, dupe_factor=4, mockserver=self.mockserver)
+        self.assertEqual(crawler.spider.visited, 3)
 
     @defer.inlineCallbacks
     def test_unbounded_response(self):
         # Completeness of responses without Content-Length or Transfer-Encoding
         # can not be determined, we treat them as valid but flagged as "partial"
-        from urllib import urlencode
+        from urllib.parse import urlencode
         query = urlencode({'raw': '''\
 HTTP/1.1 200 OK
 Server: Apache-Coyote/1.1
@@ -150,56 +202,57 @@ Connection: close
 foo body
 with multiples lines
 '''})
-        spider = SimpleSpider("http://localhost:8998/raw?{0}".format(query))
-        yield docrawl(spider)
-        log = get_testlog()
-        self.assertEqual(log.count("Got response 200"), 1)
+        crawler = self.runner.create_crawler(SimpleSpider)
+        with LogCapture() as log:
+            yield crawler.crawl(self.mockserver.url("/raw?{0}".format(query)), mockserver=self.mockserver)
+        self.assertEqual(str(log).count("Got response 200"), 1)
 
     @defer.inlineCallbacks
     def test_retry_conn_lost(self):
         # connection lost after receiving data
-        spider = SimpleSpider("http://localhost:8998/drop?abort=0")
-        yield docrawl(spider)
-        self._assert_retried()
+        crawler = self.runner.create_crawler(SimpleSpider)
+        with LogCapture() as log:
+            yield crawler.crawl(self.mockserver.url("/drop?abort=0"), mockserver=self.mockserver)
+        self._assert_retried(log)
 
     @defer.inlineCallbacks
     def test_retry_conn_aborted(self):
         # connection lost before receiving data
-        spider = SimpleSpider("http://localhost:8998/drop?abort=1")
-        yield docrawl(spider)
-        self._assert_retried()
+        crawler = self.runner.create_crawler(SimpleSpider)
+        with LogCapture() as log:
+            yield crawler.crawl(self.mockserver.url("/drop?abort=1"), mockserver=self.mockserver)
+        self._assert_retried(log)
 
-    def _assert_retried(self):
-        log = get_testlog()
-        self.assertEqual(log.count("Retrying"), 2)
-        self.assertEqual(log.count("Gave up retrying"), 1)
+    def _assert_retried(self, log):
+        self.assertEqual(str(log).count("Retrying"), 2)
+        self.assertEqual(str(log).count("Gave up retrying"), 1)
 
     @defer.inlineCallbacks
     def test_referer_header(self):
         """Referer header is set by RefererMiddleware unless it is already set"""
-        req0 = Request('http://localhost:8998/echo?headers=1&body=0', dont_filter=1)
+        req0 = Request(self.mockserver.url('/echo?headers=1&body=0'), dont_filter=1)
         req1 = req0.replace()
         req2 = req0.replace(headers={'Referer': None})
         req3 = req0.replace(headers={'Referer': 'http://example.com'})
         req0.meta['next'] = req1
         req1.meta['next'] = req2
         req2.meta['next'] = req3
-        spider = SingleRequestSpider(seed=req0)
-        yield docrawl(spider)
+        crawler = self.runner.create_crawler(SingleRequestSpider)
+        yield crawler.crawl(seed=req0, mockserver=self.mockserver)
         # basic asserts in case of weird communication errors
-        self.assertIn('responses', spider.meta)
-        self.assertNotIn('failures', spider.meta)
+        self.assertIn('responses', crawler.spider.meta)
+        self.assertNotIn('failures', crawler.spider.meta)
         # start requests doesn't set Referer header
-        echo0 = json.loads(spider.meta['responses'][2].body)
+        echo0 = json.loads(to_unicode(crawler.spider.meta['responses'][2].body))
         self.assertNotIn('Referer', echo0['headers'])
         # following request sets Referer to start request url
-        echo1 = json.loads(spider.meta['responses'][1].body)
+        echo1 = json.loads(to_unicode(crawler.spider.meta['responses'][1].body))
         self.assertEqual(echo1['headers'].get('Referer'), [req0.url])
         # next request avoids Referer header
-        echo2 = json.loads(spider.meta['responses'][2].body)
+        echo2 = json.loads(to_unicode(crawler.spider.meta['responses'][2].body))
         self.assertNotIn('Referer', echo2['headers'])
         # last request explicitly sets a Referer header
-        echo3 = json.loads(spider.meta['responses'][3].body)
+        echo3 = json.loads(to_unicode(crawler.spider.meta['responses'][3].body))
         self.assertEqual(echo3['headers'].get('Referer'), ['http://example.com'])
 
     @defer.inlineCallbacks
@@ -208,11 +261,278 @@ with multiples lines
         est = []
 
         def cb(response):
-            est.append(get_engine_status(spider.crawler.engine))
+            est.append(get_engine_status(crawler.engine))
 
-        spider = SingleRequestSpider(seed='http://localhost:8998/', callback_func=cb)
-        yield docrawl(spider)
+        crawler = self.runner.create_crawler(SingleRequestSpider)
+        yield crawler.crawl(seed=self.mockserver.url('/'), callback_func=cb, mockserver=self.mockserver)
         self.assertEqual(len(est), 1, est)
         s = dict(est[0])
-        self.assertEqual(s['engine.spider.name'], spider.name)
+        self.assertEqual(s['engine.spider.name'], crawler.spider.name)
         self.assertEqual(s['len(engine.scraper.slot.active)'], 1)
+
+    @defer.inlineCallbacks
+    def test_graceful_crawl_error_handling(self):
+        """
+        Test whether errors happening anywhere in Crawler.crawl() are properly
+        reported (and not somehow swallowed) after a graceful engine shutdown.
+        The errors should not come from within Scrapy's core but from within
+        spiders/middlewares/etc., e.g. raised in Spider.start_requests(),
+        SpiderMiddleware.process_start_requests(), etc.
+        """
+
+        class TestError(Exception):
+            pass
+
+        class FaultySpider(SimpleSpider):
+            def start_requests(self):
+                raise TestError
+
+        crawler = self.runner.create_crawler(FaultySpider)
+        yield self.assertFailure(crawler.crawl(mockserver=self.mockserver), TestError)
+        self.assertFalse(crawler.crawling)
+
+    @defer.inlineCallbacks
+    def test_open_spider_error_on_faulty_pipeline(self):
+        settings = {
+            "ITEM_PIPELINES": {
+                "tests.pipelines.ZeroDivisionErrorPipeline": 300,
+            }
+        }
+        crawler = CrawlerRunner(settings).create_crawler(SimpleSpider)
+        yield self.assertFailure(
+            self.runner.crawl(crawler, self.mockserver.url("/status?n=200"), mockserver=self.mockserver),
+            ZeroDivisionError)
+        self.assertFalse(crawler.crawling)
+
+    @defer.inlineCallbacks
+    def test_crawlerrunner_accepts_crawler(self):
+        crawler = self.runner.create_crawler(SimpleSpider)
+        with LogCapture() as log:
+            yield self.runner.crawl(crawler, self.mockserver.url("/status?n=200"), mockserver=self.mockserver)
+        self.assertIn("Got response 200", str(log))
+
+    @defer.inlineCallbacks
+    def test_crawl_multiple(self):
+        self.runner.crawl(SimpleSpider, self.mockserver.url("/status?n=200"), mockserver=self.mockserver)
+        self.runner.crawl(SimpleSpider, self.mockserver.url("/status?n=503"), mockserver=self.mockserver)
+
+        with LogCapture() as log:
+            yield self.runner.join()
+
+        self._assert_retried(log)
+        self.assertIn("Got response 200", str(log))
+
+
+class CrawlSpiderTestCase(TestCase):
+
+    def setUp(self):
+        self.mockserver = MockServer()
+        self.mockserver.__enter__()
+        self.runner = CrawlerRunner()
+
+    def tearDown(self):
+        self.mockserver.__exit__(None, None, None)
+
+    @defer.inlineCallbacks
+    def test_crawlspider_with_parse(self):
+        self.runner.crawl(CrawlSpiderWithParseMethod, mockserver=self.mockserver)
+
+        with LogCapture() as log:
+            yield self.runner.join()
+
+        self.assertIn("[parse] status 200 (foo: None)", str(log))
+        self.assertIn("[parse] status 201 (foo: None)", str(log))
+        self.assertIn("[parse] status 202 (foo: bar)", str(log))
+
+    @defer.inlineCallbacks
+    def test_crawlspider_with_errback(self):
+        self.runner.crawl(CrawlSpiderWithErrback, mockserver=self.mockserver)
+
+        with LogCapture() as log:
+            yield self.runner.join()
+
+        self.assertIn("[parse] status 200 (foo: None)", str(log))
+        self.assertIn("[parse] status 201 (foo: None)", str(log))
+        self.assertIn("[parse] status 202 (foo: bar)", str(log))
+        self.assertIn("[errback] status 404", str(log))
+        self.assertIn("[errback] status 500", str(log))
+        self.assertIn("[errback] status 501", str(log))
+
+    @defer.inlineCallbacks
+    def test_async_def_parse(self):
+        self.runner.crawl(AsyncDefSpider, self.mockserver.url("/status?n=200"), mockserver=self.mockserver)
+        with LogCapture() as log:
+            yield self.runner.join()
+        self.assertIn("Got response 200", str(log))
+
+    @mark.only_asyncio()
+    @defer.inlineCallbacks
+    def test_async_def_asyncio_parse(self):
+        runner = CrawlerRunner({"TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor"})
+        runner.crawl(AsyncDefAsyncioSpider, self.mockserver.url("/status?n=200"), mockserver=self.mockserver)
+        with LogCapture() as log:
+            yield runner.join()
+        self.assertIn("Got response 200", str(log))
+
+    @mark.only_asyncio()
+    @defer.inlineCallbacks
+    def test_async_def_asyncio_parse_items_list(self):
+        items = []
+
+        def _on_item_scraped(item):
+            items.append(item)
+
+        crawler = self.runner.create_crawler(AsyncDefAsyncioReturnSpider)
+        crawler.signals.connect(_on_item_scraped, signals.item_scraped)
+        with LogCapture() as log:
+            yield crawler.crawl(self.mockserver.url("/status?n=200"), mockserver=self.mockserver)
+        self.assertIn("Got response 200", str(log))
+        self.assertIn({'id': 1}, items)
+        self.assertIn({'id': 2}, items)
+
+    @mark.only_asyncio()
+    @defer.inlineCallbacks
+    def test_async_def_asyncio_parse_items_single_element(self):
+        items = []
+
+        def _on_item_scraped(item):
+            items.append(item)
+
+        crawler = self.runner.create_crawler(AsyncDefAsyncioReturnSingleElementSpider)
+        crawler.signals.connect(_on_item_scraped, signals.item_scraped)
+        with LogCapture() as log:
+            yield crawler.crawl(self.mockserver.url("/status?n=200"), mockserver=self.mockserver)
+        self.assertIn("Got response 200", str(log))
+        self.assertIn({"foo": 42}, items)
+
+    @mark.skipif(sys.version_info < (3, 6), reason="Async generators require Python 3.6 or higher")
+    @mark.only_asyncio()
+    @defer.inlineCallbacks
+    def test_async_def_asyncgen_parse(self):
+        from tests.py36._test_crawl import AsyncDefAsyncioGenSpider
+        crawler = self.runner.create_crawler(AsyncDefAsyncioGenSpider)
+        with LogCapture() as log:
+            yield crawler.crawl(self.mockserver.url("/status?n=200"), mockserver=self.mockserver)
+        self.assertIn("Got response 200", str(log))
+        itemcount = crawler.stats.get_value('item_scraped_count')
+        self.assertEqual(itemcount, 1)
+
+    @mark.skipif(sys.version_info < (3, 6), reason="Async generators require Python 3.6 or higher")
+    @mark.only_asyncio()
+    @defer.inlineCallbacks
+    def test_async_def_asyncgen_parse_loop(self):
+        items = []
+
+        def _on_item_scraped(item):
+            items.append(item)
+
+        from tests.py36._test_crawl import AsyncDefAsyncioGenLoopSpider
+        crawler = self.runner.create_crawler(AsyncDefAsyncioGenLoopSpider)
+        crawler.signals.connect(_on_item_scraped, signals.item_scraped)
+        with LogCapture() as log:
+            yield crawler.crawl(self.mockserver.url("/status?n=200"), mockserver=self.mockserver)
+        self.assertIn("Got response 200", str(log))
+        itemcount = crawler.stats.get_value('item_scraped_count')
+        self.assertEqual(itemcount, 10)
+        for i in range(10):
+            self.assertIn({'foo': i}, items)
+
+    @mark.skipif(sys.version_info < (3, 6), reason="Async generators require Python 3.6 or higher")
+    @mark.only_asyncio()
+    @defer.inlineCallbacks
+    def test_async_def_asyncgen_parse_complex(self):
+        items = []
+
+        def _on_item_scraped(item):
+            items.append(item)
+
+        from tests.py36._test_crawl import AsyncDefAsyncioGenComplexSpider
+        crawler = self.runner.create_crawler(AsyncDefAsyncioGenComplexSpider)
+        crawler.signals.connect(_on_item_scraped, signals.item_scraped)
+        yield crawler.crawl(mockserver=self.mockserver)
+        itemcount = crawler.stats.get_value('item_scraped_count')
+        self.assertEqual(itemcount, 156)
+        # some random items
+        for i in [1, 4, 21, 22, 207, 311]:
+            self.assertIn({'index': i}, items)
+        for i in [10, 30, 122]:
+            self.assertIn({'index2': i}, items)
+
+    @mark.only_asyncio()
+    @defer.inlineCallbacks
+    def test_async_def_asyncio_parse_reqs_list(self):
+        crawler = self.runner.create_crawler(AsyncDefAsyncioReqsReturnSpider)
+        with LogCapture() as log:
+            yield crawler.crawl(self.mockserver.url("/status?n=200"), mockserver=self.mockserver)
+        for req_id in range(3):
+            self.assertIn("Got response 200, req_id %d" % req_id, str(log))
+
+    @defer.inlineCallbacks
+    def test_response_ssl_certificate_none(self):
+        crawler = self.runner.create_crawler(SingleRequestSpider)
+        url = self.mockserver.url("/echo?body=test", is_secure=False)
+        yield crawler.crawl(seed=url, mockserver=self.mockserver)
+        self.assertIsNone(crawler.spider.meta['responses'][0].certificate)
+
+    @defer.inlineCallbacks
+    def test_response_ssl_certificate(self):
+        crawler = self.runner.create_crawler(SingleRequestSpider)
+        url = self.mockserver.url("/echo?body=test", is_secure=True)
+        yield crawler.crawl(seed=url, mockserver=self.mockserver)
+        cert = crawler.spider.meta['responses'][0].certificate
+        self.assertIsInstance(cert, Certificate)
+        self.assertEqual(cert.getSubject().commonName, b"localhost")
+        self.assertEqual(cert.getIssuer().commonName, b"localhost")
+
+    @mark.xfail(reason="Responses with no body return early and contain no certificate")
+    @defer.inlineCallbacks
+    def test_response_ssl_certificate_empty_response(self):
+        crawler = self.runner.create_crawler(SingleRequestSpider)
+        url = self.mockserver.url("/status?n=200", is_secure=True)
+        yield crawler.crawl(seed=url, mockserver=self.mockserver)
+        cert = crawler.spider.meta['responses'][0].certificate
+        self.assertIsInstance(cert, Certificate)
+        self.assertEqual(cert.getSubject().commonName, b"localhost")
+        self.assertEqual(cert.getIssuer().commonName, b"localhost")
+
+    @defer.inlineCallbacks
+    def test_dns_server_ip_address_none(self):
+        crawler = self.runner.create_crawler(SingleRequestSpider)
+        url = self.mockserver.url('/status?n=200')
+        yield crawler.crawl(seed=url, mockserver=self.mockserver)
+        ip_address = crawler.spider.meta['responses'][0].ip_address
+        self.assertIsNone(ip_address)
+
+    @defer.inlineCallbacks
+    def test_dns_server_ip_address(self):
+        crawler = self.runner.create_crawler(SingleRequestSpider)
+        url = self.mockserver.url('/echo?body=test')
+        expected_netloc, _ = urlparse(url).netloc.split(':')
+        yield crawler.crawl(seed=url, mockserver=self.mockserver)
+        ip_address = crawler.spider.meta['responses'][0].ip_address
+        self.assertIsInstance(ip_address, IPv4Address)
+        self.assertEqual(str(ip_address), gethostbyname(expected_netloc))
+
+    @defer.inlineCallbacks
+    def test_stop_download_callback(self):
+        crawler = self.runner.create_crawler(BytesReceivedCallbackSpider)
+        yield crawler.crawl(mockserver=self.mockserver)
+        self.assertIsNone(crawler.spider.meta.get("failure"))
+        self.assertIsInstance(crawler.spider.meta["response"], Response)
+        self.assertEqual(crawler.spider.meta["response"].body, crawler.spider.meta.get("bytes_received"))
+        self.assertLess(len(crawler.spider.meta["response"].body), crawler.spider.full_response_length)
+
+    @defer.inlineCallbacks
+    def test_stop_download_errback(self):
+        crawler = self.runner.create_crawler(BytesReceivedErrbackSpider)
+        yield crawler.crawl(mockserver=self.mockserver)
+        self.assertIsNone(crawler.spider.meta.get("response"))
+        self.assertIsInstance(crawler.spider.meta["failure"], Failure)
+        self.assertIsInstance(crawler.spider.meta["failure"].value, StopDownload)
+        self.assertIsInstance(crawler.spider.meta["failure"].value.response, Response)
+        self.assertEqual(
+            crawler.spider.meta["failure"].value.response.body,
+            crawler.spider.meta.get("bytes_received"))
+        self.assertLess(
+            len(crawler.spider.meta["failure"].value.response.body),
+            crawler.spider.full_response_length)
diff --git a/tests/test_crawler.py b/tests/test_crawler.py
new file mode 100644
index 000000000..7c2e251a9
--- /dev/null
+++ b/tests/test_crawler.py
@@ -0,0 +1,365 @@
+import logging
+import os
+import platform
+import subprocess
+import sys
+import warnings
+from unittest import skipIf
+
+from pytest import raises, mark
+from testfixtures import LogCapture
+from twisted.internet import defer
+from twisted.trial import unittest
+
+import scrapy
+from scrapy.crawler import Crawler, CrawlerRunner, CrawlerProcess
+from scrapy.settings import Settings, default_settings
+from scrapy.spiderloader import SpiderLoader
+from scrapy.utils.log import configure_logging, get_scrapy_root_handler
+from scrapy.utils.spider import DefaultSpider
+from scrapy.utils.misc import load_object
+from scrapy.extensions.throttle import AutoThrottle
+from scrapy.extensions import telnet
+from scrapy.utils.test import get_testenv
+
+
+class BaseCrawlerTest(unittest.TestCase):
+
+    def assertOptionIsDefault(self, settings, key):
+        self.assertIsInstance(settings, Settings)
+        self.assertEqual(settings[key], getattr(default_settings, key))
+
+
+class CrawlerTestCase(BaseCrawlerTest):
+
+    def setUp(self):
+        self.crawler = Crawler(DefaultSpider, Settings())
+
+    def test_populate_spidercls_settings(self):
+        spider_settings = {'TEST1': 'spider', 'TEST2': 'spider'}
+        project_settings = {'TEST1': 'project', 'TEST3': 'project'}
+
+        class CustomSettingsSpider(DefaultSpider):
+            custom_settings = spider_settings
+
+        settings = Settings()
+        settings.setdict(project_settings, priority='project')
+        crawler = Crawler(CustomSettingsSpider, settings)
+
+        self.assertEqual(crawler.settings.get('TEST1'), 'spider')
+        self.assertEqual(crawler.settings.get('TEST2'), 'spider')
+        self.assertEqual(crawler.settings.get('TEST3'), 'project')
+
+        self.assertFalse(settings.frozen)
+        self.assertTrue(crawler.settings.frozen)
+
+    def test_crawler_accepts_dict(self):
+        crawler = Crawler(DefaultSpider, {'foo': 'bar'})
+        self.assertEqual(crawler.settings['foo'], 'bar')
+        self.assertOptionIsDefault(crawler.settings, 'RETRY_ENABLED')
+
+    def test_crawler_accepts_None(self):
+        crawler = Crawler(DefaultSpider)
+        self.assertOptionIsDefault(crawler.settings, 'RETRY_ENABLED')
+
+    def test_crawler_rejects_spider_objects(self):
+        with raises(ValueError):
+            Crawler(DefaultSpider())
+
+
+class SpiderSettingsTestCase(unittest.TestCase):
+    def test_spider_custom_settings(self):
+        class MySpider(scrapy.Spider):
+            name = 'spider'
+            custom_settings = {
+                'AUTOTHROTTLE_ENABLED': True
+            }
+
+        crawler = Crawler(MySpider, {})
+        enabled_exts = [e.__class__ for e in crawler.extensions.middlewares]
+        self.assertIn(AutoThrottle, enabled_exts)
+
+
+class CrawlerLoggingTestCase(unittest.TestCase):
+    def test_no_root_handler_installed(self):
+        handler = get_scrapy_root_handler()
+        if handler is not None:
+            logging.root.removeHandler(handler)
+
+        class MySpider(scrapy.Spider):
+            name = 'spider'
+
+        Crawler(MySpider, {})
+        assert get_scrapy_root_handler() is None
+
+    def test_spider_custom_settings_log_level(self):
+        log_file = self.mktemp()
+
+        class MySpider(scrapy.Spider):
+            name = 'spider'
+            custom_settings = {
+                'LOG_LEVEL': 'INFO',
+                'LOG_FILE': log_file,
+                # disable telnet if not available to avoid an extra warning
+                'TELNETCONSOLE_ENABLED': telnet.TWISTED_CONCH_AVAILABLE,
+            }
+
+        configure_logging()
+        self.assertEqual(get_scrapy_root_handler().level, logging.DEBUG)
+        crawler = Crawler(MySpider, {})
+        self.assertEqual(get_scrapy_root_handler().level, logging.INFO)
+        info_count = crawler.stats.get_value('log_count/INFO')
+        logging.debug('debug message')
+        logging.info('info message')
+        logging.warning('warning message')
+        logging.error('error message')
+
+        with open(log_file, 'rb') as fo:
+            logged = fo.read().decode('utf8')
+
+        self.assertNotIn('debug message', logged)
+        self.assertIn('info message', logged)
+        self.assertIn('warning message', logged)
+        self.assertIn('error message', logged)
+        self.assertEqual(crawler.stats.get_value('log_count/ERROR'), 1)
+        self.assertEqual(crawler.stats.get_value('log_count/WARNING'), 1)
+        self.assertEqual(
+            crawler.stats.get_value('log_count/INFO') - info_count, 1)
+        self.assertEqual(crawler.stats.get_value('log_count/DEBUG', 0), 0)
+
+
+class SpiderLoaderWithWrongInterface:
+
+    def unneeded_method(self):
+        pass
+
+
+class CustomSpiderLoader(SpiderLoader):
+    pass
+
+
+class CrawlerRunnerTestCase(BaseCrawlerTest):
+
+    def test_spider_manager_verify_interface(self):
+        settings = Settings({
+            'SPIDER_LOADER_CLASS': 'tests.test_crawler.SpiderLoaderWithWrongInterface'
+        })
+        with warnings.catch_warnings(record=True) as w:
+            self.assertRaises(AttributeError, CrawlerRunner, settings)
+            self.assertEqual(len(w), 1)
+            self.assertIn("SPIDER_LOADER_CLASS", str(w[0].message))
+            self.assertIn("scrapy.interfaces.ISpiderLoader", str(w[0].message))
+
+    def test_crawler_runner_accepts_dict(self):
+        runner = CrawlerRunner({'foo': 'bar'})
+        self.assertEqual(runner.settings['foo'], 'bar')
+        self.assertOptionIsDefault(runner.settings, 'RETRY_ENABLED')
+
+    def test_crawler_runner_accepts_None(self):
+        runner = CrawlerRunner()
+        self.assertOptionIsDefault(runner.settings, 'RETRY_ENABLED')
+
+    def test_deprecated_attribute_spiders(self):
+        with warnings.catch_warnings(record=True) as w:
+            runner = CrawlerRunner(Settings())
+            spiders = runner.spiders
+            self.assertEqual(len(w), 1)
+            self.assertIn("CrawlerRunner.spiders", str(w[0].message))
+            self.assertIn("CrawlerRunner.spider_loader", str(w[0].message))
+            sl_cls = load_object(runner.settings['SPIDER_LOADER_CLASS'])
+            self.assertIsInstance(spiders, sl_cls)
+
+
+class CrawlerProcessTest(BaseCrawlerTest):
+    def test_crawler_process_accepts_dict(self):
+        runner = CrawlerProcess({'foo': 'bar'})
+        self.assertEqual(runner.settings['foo'], 'bar')
+        self.assertOptionIsDefault(runner.settings, 'RETRY_ENABLED')
+
+    def test_crawler_process_accepts_None(self):
+        runner = CrawlerProcess()
+        self.assertOptionIsDefault(runner.settings, 'RETRY_ENABLED')
+
+
+class ExceptionSpider(scrapy.Spider):
+    name = 'exception'
+
+    @classmethod
+    def from_crawler(cls, crawler, *args, **kwargs):
+        raise ValueError('Exception in from_crawler method')
+
+
+class NoRequestsSpider(scrapy.Spider):
+    name = 'no_request'
+
+    def start_requests(self):
+        return []
+
+
+@mark.usefixtures('reactor_pytest')
+class CrawlerRunnerHasSpider(unittest.TestCase):
+
+    @defer.inlineCallbacks
+    def test_crawler_runner_bootstrap_successful(self):
+        runner = CrawlerRunner()
+        yield runner.crawl(NoRequestsSpider)
+        self.assertEqual(runner.bootstrap_failed, False)
+
+    @defer.inlineCallbacks
+    def test_crawler_runner_bootstrap_successful_for_several(self):
+        runner = CrawlerRunner()
+        yield runner.crawl(NoRequestsSpider)
+        yield runner.crawl(NoRequestsSpider)
+        self.assertEqual(runner.bootstrap_failed, False)
+
+    @defer.inlineCallbacks
+    def test_crawler_runner_bootstrap_failed(self):
+        runner = CrawlerRunner()
+
+        try:
+            yield runner.crawl(ExceptionSpider)
+        except ValueError:
+            pass
+        else:
+            self.fail('Exception should be raised from spider')
+
+        self.assertEqual(runner.bootstrap_failed, True)
+
+    @defer.inlineCallbacks
+    def test_crawler_runner_bootstrap_failed_for_several(self):
+        runner = CrawlerRunner()
+
+        try:
+            yield runner.crawl(ExceptionSpider)
+        except ValueError:
+            pass
+        else:
+            self.fail('Exception should be raised from spider')
+
+        yield runner.crawl(NoRequestsSpider)
+
+        self.assertEqual(runner.bootstrap_failed, True)
+
+    def test_crawler_runner_asyncio_enabled_true(self):
+        if self.reactor_pytest == 'asyncio':
+            CrawlerRunner(settings={
+                "TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor",
+            })
+        else:
+            msg = r"The installed reactor \(.*?\) does not match the requested one \(.*?\)"
+            with self.assertRaisesRegex(Exception, msg):
+                CrawlerRunner(settings={
+                    "TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor",
+                })
+
+    @defer.inlineCallbacks
+    # https://twistedmatrix.com/trac/ticket/9766
+    @skipIf(platform.system() == 'Windows' and sys.version_info >= (3, 8),
+            "the asyncio reactor is broken on Windows when running Python ≥ 3.8")
+    def test_crawler_process_asyncio_enabled_true(self):
+        with LogCapture(level=logging.DEBUG) as log:
+            if self.reactor_pytest == 'asyncio':
+                runner = CrawlerProcess(settings={
+                    "TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor",
+                })
+                yield runner.crawl(NoRequestsSpider)
+                self.assertIn("Using reactor: twisted.internet.asyncioreactor.AsyncioSelectorReactor", str(log))
+            else:
+                msg = r"The installed reactor \(.*?\) does not match the requested one \(.*?\)"
+                with self.assertRaisesRegex(Exception, msg):
+                    runner = CrawlerProcess(settings={
+                        "TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor",
+                    })
+
+    @defer.inlineCallbacks
+    def test_crawler_process_asyncio_enabled_false(self):
+        runner = CrawlerProcess(settings={"TWISTED_REACTOR": None})
+        with LogCapture(level=logging.DEBUG) as log:
+            yield runner.crawl(NoRequestsSpider)
+            self.assertNotIn("Using reactor: twisted.internet.asyncioreactor.AsyncioSelectorReactor", str(log))
+
+
+class ScriptRunnerMixin:
+    def run_script(self, script_name):
+        script_path = os.path.join(self.script_dir, script_name)
+        args = (sys.executable, script_path)
+        p = subprocess.Popen(args, env=get_testenv(),
+                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout, stderr = p.communicate()
+        return stderr.decode('utf-8')
+
+
+class CrawlerProcessSubprocess(ScriptRunnerMixin, unittest.TestCase):
+    script_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'CrawlerProcess')
+
+    def test_simple(self):
+        log = self.run_script('simple.py')
+        self.assertIn('Spider closed (finished)', log)
+        self.assertNotIn("Using reactor: twisted.internet.asyncioreactor.AsyncioSelectorReactor", log)
+
+    # https://twistedmatrix.com/trac/ticket/9766
+    @skipIf(platform.system() == 'Windows' and sys.version_info >= (3, 8),
+            "the asyncio reactor is broken on Windows when running Python ≥ 3.8")
+    def test_asyncio_enabled_no_reactor(self):
+        log = self.run_script('asyncio_enabled_no_reactor.py')
+        self.assertIn('Spider closed (finished)', log)
+        self.assertIn("Using reactor: twisted.internet.asyncioreactor.AsyncioSelectorReactor", log)
+
+    # https://twistedmatrix.com/trac/ticket/9766
+    @skipIf(platform.system() == 'Windows' and sys.version_info >= (3, 8),
+            "the asyncio reactor is broken on Windows when running Python ≥ 3.8")
+    def test_asyncio_enabled_reactor(self):
+        log = self.run_script('asyncio_enabled_reactor.py')
+        self.assertIn('Spider closed (finished)', log)
+        self.assertIn("Using reactor: twisted.internet.asyncioreactor.AsyncioSelectorReactor", log)
+
+    def test_ipv6_default_name_resolver(self):
+        log = self.run_script('default_name_resolver.py')
+        self.assertIn('Spider closed (finished)', log)
+        self.assertIn("'downloader/exception_type_count/twisted.internet.error.DNSLookupError': 1,", log)
+        self.assertIn(
+            "twisted.internet.error.DNSLookupError: DNS lookup failed: no results for hostname lookup: ::1.",
+            log)
+
+    def test_ipv6_alternative_name_resolver(self):
+        log = self.run_script('alternative_name_resolver.py')
+        self.assertIn('Spider closed (finished)', log)
+        self.assertNotIn("twisted.internet.error.DNSLookupError", log)
+
+    def test_reactor_select(self):
+        log = self.run_script("twisted_reactor_select.py")
+        self.assertIn("Spider closed (finished)", log)
+        self.assertIn("Using reactor: twisted.internet.selectreactor.SelectReactor", log)
+
+    @mark.skipif(platform.system() == 'Windows', reason="PollReactor is not supported on Windows")
+    def test_reactor_poll(self):
+        log = self.run_script("twisted_reactor_poll.py")
+        self.assertIn("Spider closed (finished)", log)
+        self.assertIn("Using reactor: twisted.internet.pollreactor.PollReactor", log)
+
+    # https://twistedmatrix.com/trac/ticket/9766
+    @skipIf(platform.system() == 'Windows' and sys.version_info >= (3, 8),
+            "the asyncio reactor is broken on Windows when running Python ≥ 3.8")
+    def test_reactor_asyncio(self):
+        log = self.run_script("twisted_reactor_asyncio.py")
+        self.assertIn("Spider closed (finished)", log)
+        self.assertIn("Using reactor: twisted.internet.asyncioreactor.AsyncioSelectorReactor", log)
+
+    @mark.skipif(sys.implementation.name == 'pypy', reason='uvloop does not support pypy properly')
+    @mark.skipif(platform.system() == 'Windows', reason='uvloop does not support Windows')
+    def test_custom_loop_asyncio(self):
+        log = self.run_script("asyncio_custom_loop.py")
+        self.assertIn("Spider closed (finished)", log)
+        self.assertIn("Using reactor: twisted.internet.asyncioreactor.AsyncioSelectorReactor", log)
+        self.assertIn("Using asyncio event loop: uvloop.Loop", log)
+
+
+class CrawlerRunnerSubprocess(ScriptRunnerMixin, unittest.TestCase):
+    script_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'CrawlerRunner')
+
+    def test_response_ip_address(self):
+        log = self.run_script("ip_address.py")
+        self.assertIn("INFO: Spider closed (finished)", log)
+        self.assertIn("INFO: Host: not.a.real.domain", log)
+        self.assertIn("INFO: Type: <class 'ipaddress.IPv4Address'>", log)
+        self.assertIn("INFO: IP address: 127.0.0.1", log)
diff --git a/tests/test_dependencies.py b/tests/test_dependencies.py
index 03bf2ffcf..5d0a1d0c9 100644
--- a/tests/test_dependencies.py
+++ b/tests/test_dependencies.py
@@ -1,16 +1,18 @@
 from importlib import import_module
 from twisted.trial import unittest
 
+
 class ScrapyUtilsTest(unittest.TestCase):
     def test_required_openssl_version(self):
         try:
             module = import_module('OpenSSL')
-        except ImportError as ex:
+        except ImportError:
             raise unittest.SkipTest("OpenSSL is not available")
 
         if hasattr(module, '__version__'):
             installed_version = [int(x) for x in module.__version__.split('.')[:2]]
             assert installed_version >= [0, 6], "OpenSSL >= 0.6 required"
 
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_djangoitem/__init__.py b/tests/test_djangoitem/__init__.py
deleted file mode 100644
index 45fd7c470..000000000
--- a/tests/test_djangoitem/__init__.py
+++ /dev/null
@@ -1,103 +0,0 @@
-import os
-from twisted.trial import unittest
-
-from scrapy.contrib.djangoitem import DjangoItem, Field
-from scrapy import optional_features
-
-os.environ['DJANGO_SETTINGS_MODULE'] = 'tests.test_djangoitem.settings'
-
-if 'django' in optional_features:
-    from .models import Person, IdentifiedPerson
-
-    class BasePersonItem(DjangoItem):
-        django_model = Person
-
-    class NewFieldPersonItem(BasePersonItem):
-        other = Field()
-
-    class OverrideFieldPersonItem(BasePersonItem):
-        age = Field()
-
-    class IdentifiedPersonItem(DjangoItem):
-        django_model = IdentifiedPerson
-
-
-class DjangoItemTest(unittest.TestCase):
-
-    def assertSortedEqual(self, first, second, msg=None):
-        return self.assertEqual(sorted(first), sorted(second), msg)
-
-    def setUp(self):
-        if 'django' not in optional_features:
-            raise unittest.SkipTest("Django is not available")
-
-    def test_base(self):
-        i = BasePersonItem()
-        self.assertSortedEqual(i.fields.keys(), ['age', 'name'])
-
-    def test_new_fields(self):
-        i = NewFieldPersonItem()
-        self.assertSortedEqual(i.fields.keys(), ['age', 'other', 'name'])
-
-    def test_override_field(self):
-        i = OverrideFieldPersonItem()
-        self.assertSortedEqual(i.fields.keys(), ['age', 'name'])
-
-    def test_custom_primary_key_field(self):
-        """
-        Test that if a custom primary key exists, it is
-        in the field list.
-        """
-        i = IdentifiedPersonItem()
-        self.assertSortedEqual(i.fields.keys(), ['age', 'identifier', 'name'])
-
-    def test_save(self):
-        i = BasePersonItem()
-        self.assertSortedEqual(i.fields.keys(), ['age', 'name'])
-
-        i['name'] = 'John'
-        i['age'] = '22'
-        person = i.save(commit=False)
-
-        self.assertEqual(person.name, 'John')
-        self.assertEqual(person.age, '22')
-
-    def test_override_save(self):
-        i = OverrideFieldPersonItem()
-
-        i['name'] = 'John'
-        # it is not obvious that "age" should be saved also, since it was
-        # redefined in child class
-        i['age'] = '22'
-        person = i.save(commit=False)
-
-        self.assertEqual(person.name, 'John')
-        self.assertEqual(person.age, '22')
-
-    def test_validation(self):
-        long_name = 'z' * 300
-        i = BasePersonItem(name=long_name)
-        self.assertFalse(i.is_valid())
-        self.assertEqual(set(i.errors), set(['age', 'name']))
-        i = BasePersonItem(name='John')
-        self.assertTrue(i.is_valid(exclude=['age']))
-        self.assertEqual({}, i.errors)
-
-        # once the item is validated, it does not validate again
-        i['name'] = long_name
-        self.assertTrue(i.is_valid())
-
-    def test_override_validation(self):
-        i = OverrideFieldPersonItem()
-        i['name'] = 'John'
-        self.assertFalse(i.is_valid())
-
-        i = i = OverrideFieldPersonItem()
-        i['name'] = 'John'
-        i['age'] = '22'
-        self.assertTrue(i.is_valid())
-
-    def test_default_field_values(self):
-        i = BasePersonItem()
-        person = i.save(commit=False)
-        self.assertEqual(person.name, 'Robot')
diff --git a/tests/test_djangoitem/models.py b/tests/test_djangoitem/models.py
deleted file mode 100644
index 2beaee752..000000000
--- a/tests/test_djangoitem/models.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from django.db import models
-
-
-class Person(models.Model):
-    name = models.CharField(max_length=255, default='Robot')
-    age = models.IntegerField()
-
-    class Meta:
-        app_label = 'test_djangoitem'
-
-class IdentifiedPerson(models.Model):
-    identifier = models.PositiveIntegerField(primary_key=True)
-    name = models.CharField(max_length=255)
-    age = models.IntegerField()
-
-    class Meta:
-        app_label = 'test_djangoitem'
diff --git a/tests/test_djangoitem/settings.py b/tests/test_djangoitem/settings.py
deleted file mode 100644
index 1bee92477..000000000
--- a/tests/test_djangoitem/settings.py
+++ /dev/null
@@ -1,8 +0,0 @@
-DATABASES = {
-    'default': {
-        'ENGINE': 'django.db.backends.sqlite3',
-        'NAME': ':memory:',
-    }
-}
-
-SECRET_KEY = 'top-secret'
diff --git a/tests/test_downloader_handlers.py b/tests/test_downloader_handlers.py
index 6a3115004..13063d106 100644
--- a/tests/test_downloader_handlers.py
+++ b/tests/test_downloader_handlers.py
@@ -1,66 +1,100 @@
+import contextlib
 import os
-import twisted
+import shutil
+import tempfile
+from unittest import mock
 
-from twisted.trial import unittest
+from testfixtures import LogCapture
+from twisted.cred import checkers, credentials, portal
+from twisted.internet import defer, error, reactor
 from twisted.protocols.policies import WrappingFactory
 from twisted.python.filepath import FilePath
-from twisted.internet import reactor, defer, error
-from twisted.web import server, static, util, resource
-from twisted.web.test.test_webclient import ForeverTakingResource, \
-        NoLengthResource, HostHeaderResource, \
-        PayloadResource, BrokenDownloadResource
-from twisted.protocols.ftp import FTPRealm, FTPFactory
-from twisted.cred import portal, checkers, credentials
-from twisted.protocols.ftp import FTPClient, ConnectionLost
+from twisted.trial import unittest
+from twisted.web import resource, server, static, util
+from twisted.web._newclient import ResponseFailed
+from twisted.web.http import _DataLoss
+from twisted.web.test.test_webclient import (ForeverTakingResource, HostHeaderResource,
+                                             NoLengthResource, PayloadResource)
 from w3lib.url import path_to_file_uri
 
-from scrapy import twisted_version
 from scrapy.core.downloader.handlers import DownloadHandlers
+from scrapy.core.downloader.handlers.datauri import DataURIDownloadHandler
 from scrapy.core.downloader.handlers.file import FileDownloadHandler
-from scrapy.core.downloader.handlers.http import HTTPDownloadHandler, HttpDownloadHandler
+from scrapy.core.downloader.handlers.http import HTTPDownloadHandler
 from scrapy.core.downloader.handlers.http10 import HTTP10DownloadHandler
 from scrapy.core.downloader.handlers.http11 import HTTP11DownloadHandler
 from scrapy.core.downloader.handlers.s3 import S3DownloadHandler
-from scrapy.core.downloader.handlers.ftp import FTPDownloadHandler
 
-from scrapy.spider import Spider
-from scrapy.http import Request
-from scrapy.settings import Settings
-from scrapy import optional_features
-from scrapy.utils.test import get_crawler
-from scrapy.exceptions import NotConfigured
+from scrapy.exceptions import NotConfigured, ScrapyDeprecationWarning
+from scrapy.http import Headers, Request
+from scrapy.http.response.text import TextResponse
+from scrapy.responsetypes import responsetypes
+from scrapy.spiders import Spider
+from scrapy.utils.misc import create_instance
+from scrapy.utils.python import to_bytes
+from scrapy.utils.test import get_crawler, skip_if_no_boto
+
+from tests.mockserver import MockServer, ssl_context_factory, Echo
+from tests.spiders import SingleRequestSpider
 
 
-class DummyDH(object):
-
-    def __init__(self, crawler):
-        pass
+class DummyDH:
+    lazy = False
 
 
-class OffDH(object):
+class DummyLazyDH:
+    # Default is lazy for backward compatibility
+    pass
+
+
+class OffDH:
+    lazy = False
 
     def __init__(self, crawler):
         raise NotConfigured
 
+    @classmethod
+    def from_crawler(cls, crawler):
+        return cls(crawler)
+
 
 class LoadTestCase(unittest.TestCase):
 
     def test_enabled_handler(self):
         handlers = {'scheme': 'tests.test_downloader_handlers.DummyDH'}
-        dh = DownloadHandlers(get_crawler({'DOWNLOAD_HANDLERS': handlers}))
+        crawler = get_crawler(settings_dict={'DOWNLOAD_HANDLERS': handlers})
+        dh = DownloadHandlers(crawler)
+        self.assertIn('scheme', dh._schemes)
         self.assertIn('scheme', dh._handlers)
         self.assertNotIn('scheme', dh._notconfigured)
 
     def test_not_configured_handler(self):
         handlers = {'scheme': 'tests.test_downloader_handlers.OffDH'}
-        dh = DownloadHandlers(get_crawler({'DOWNLOAD_HANDLERS': handlers}))
+        crawler = get_crawler(settings_dict={'DOWNLOAD_HANDLERS': handlers})
+        dh = DownloadHandlers(crawler)
+        self.assertIn('scheme', dh._schemes)
         self.assertNotIn('scheme', dh._handlers)
         self.assertIn('scheme', dh._notconfigured)
 
     def test_disabled_handler(self):
         handlers = {'scheme': None}
-        dh = DownloadHandlers(get_crawler({'DOWNLOAD_HANDLERS': handlers}))
+        crawler = get_crawler(settings_dict={'DOWNLOAD_HANDLERS': handlers})
+        dh = DownloadHandlers(crawler)
+        self.assertNotIn('scheme', dh._schemes)
+        for scheme in handlers:  # force load handlers
+            dh._get_handler(scheme)
         self.assertNotIn('scheme', dh._handlers)
+        self.assertIn('scheme', dh._notconfigured)
+
+    def test_lazy_handlers(self):
+        handlers = {'scheme': 'tests.test_downloader_handlers.DummyLazyDH'}
+        crawler = get_crawler(settings_dict={'DOWNLOAD_HANDLERS': handlers})
+        dh = DownloadHandlers(crawler)
+        self.assertIn('scheme', dh._schemes)
+        self.assertNotIn('scheme', dh._handlers)
+        for scheme in handlers:  # force load lazy handler
+            dh._get_handler(scheme)
+        self.assertIn('scheme', dh._handlers)
         self.assertNotIn('scheme', dh._notconfigured)
 
 
@@ -68,16 +102,19 @@ class FileTestCase(unittest.TestCase):
 
     def setUp(self):
         self.tmpname = self.mktemp()
-        fd = open(self.tmpname + '^', 'w')
-        fd.write('0123456789')
-        fd.close()
-        self.download_request = FileDownloadHandler(Settings()).download_request
+        with open(self.tmpname + '^', 'w') as f:
+            f.write('0123456789')
+        handler = create_instance(FileDownloadHandler, None, get_crawler())
+        self.download_request = handler.download_request
+
+    def tearDown(self):
+        os.unlink(self.tmpname + '^')
 
     def test_download(self):
         def _test(response):
-            self.assertEquals(response.url, request.url)
-            self.assertEquals(response.status, 200)
-            self.assertEquals(response.body, '0123456789')
+            self.assertEqual(response.url, request.url)
+            self.assertEqual(response.status, 200)
+            self.assertEqual(response.body, b'0123456789')
 
         request = Request(path_to_file_uri(self.tmpname + '^'))
         assert request.url.upper().endswith('%5E')
@@ -89,27 +126,119 @@ class FileTestCase(unittest.TestCase):
         return self.assertFailure(d, IOError)
 
 
+class ContentLengthHeaderResource(resource.Resource):
+    """
+    A testing resource which renders itself as the value of the Content-Length
+    header from the request.
+    """
+    def render(self, request):
+        return request.requestHeaders.getRawHeaders(b"content-length")[0]
+
+
+class ChunkedResource(resource.Resource):
+
+    def render(self, request):
+        def response():
+            request.write(b"chunked ")
+            request.write(b"content\n")
+            request.finish()
+        reactor.callLater(0, response)
+        return server.NOT_DONE_YET
+
+
+class BrokenChunkedResource(resource.Resource):
+
+    def render(self, request):
+        def response():
+            request.write(b"chunked ")
+            request.write(b"content\n")
+            # Disable terminating chunk on finish.
+            request.chunked = False
+            closeConnection(request)
+        reactor.callLater(0, response)
+        return server.NOT_DONE_YET
+
+
+class BrokenDownloadResource(resource.Resource):
+
+    def render(self, request):
+        def response():
+            request.setHeader(b"Content-Length", b"20")
+            request.write(b"partial")
+            closeConnection(request)
+
+        reactor.callLater(0, response)
+        return server.NOT_DONE_YET
+
+
+def closeConnection(request):
+    # We have to force a disconnection for HTTP/1.1 clients. Otherwise
+    # client keeps the connection open waiting for more data.
+    if hasattr(request.channel, 'loseConnection'):  # twisted >=16.3.0
+        request.channel.loseConnection()
+    else:
+        request.channel.transport.loseConnection()
+    request.finish()
+
+
+class EmptyContentTypeHeaderResource(resource.Resource):
+    """
+    A testing resource which renders itself as the value of request body
+    without content-type header in response.
+    """
+    def render(self, request):
+        request.setHeader("content-type", "")
+        return request.content.read()
+
+
+class LargeChunkedFileResource(resource.Resource):
+    def render(self, request):
+        def response():
+            for i in range(1024):
+                request.write(b"x" * 1024)
+            request.finish()
+        reactor.callLater(0, response)
+        return server.NOT_DONE_YET
+
+
 class HttpTestCase(unittest.TestCase):
 
+    scheme = 'http'
     download_handler_cls = HTTPDownloadHandler
 
+    # only used for HTTPS tests
+    keyfile = 'keys/localhost.key'
+    certfile = 'keys/localhost.crt'
+
     def setUp(self):
-        name = self.mktemp()
-        os.mkdir(name)
-        FilePath(name).child("file").setContent("0123456789")
-        r = static.File(name)
-        r.putChild("redirect", util.Redirect("/file"))
-        r.putChild("wait", ForeverTakingResource())
-        r.putChild("hang-after-headers", ForeverTakingResource(write=True))
-        r.putChild("nolength", NoLengthResource())
-        r.putChild("host", HostHeaderResource())
-        r.putChild("payload", PayloadResource())
-        r.putChild("broken", BrokenDownloadResource())
+        self.tmpname = self.mktemp()
+        os.mkdir(self.tmpname)
+        FilePath(self.tmpname).child("file").setContent(b"0123456789")
+        r = static.File(self.tmpname)
+        r.putChild(b"redirect", util.Redirect(b"/file"))
+        r.putChild(b"wait", ForeverTakingResource())
+        r.putChild(b"hang-after-headers", ForeverTakingResource(write=True))
+        r.putChild(b"nolength", NoLengthResource())
+        r.putChild(b"host", HostHeaderResource())
+        r.putChild(b"payload", PayloadResource())
+        r.putChild(b"broken", BrokenDownloadResource())
+        r.putChild(b"chunked", ChunkedResource())
+        r.putChild(b"broken-chunked", BrokenChunkedResource())
+        r.putChild(b"contentlength", ContentLengthHeaderResource())
+        r.putChild(b"nocontenttype", EmptyContentTypeHeaderResource())
+        r.putChild(b"largechunkedfile", LargeChunkedFileResource())
+        r.putChild(b"echo", Echo())
         self.site = server.Site(r, timeout=None)
         self.wrapper = WrappingFactory(self.site)
-        self.port = reactor.listenTCP(0, self.wrapper, interface='127.0.0.1')
+        self.host = 'localhost'
+        if self.scheme == 'https':
+            self.port = reactor.listenSSL(
+                0, self.wrapper, ssl_context_factory(self.keyfile, self.certfile),
+                interface=self.host)
+        else:
+            self.port = reactor.listenTCP(0, self.wrapper, interface=self.host)
         self.portno = self.port.getHost().port
-        self.download_handler = self.download_handler_cls(Settings())
+        self.download_handler = create_instance(self.download_handler_cls, None, get_crawler())
         self.download_request = self.download_handler.download_request
 
     @defer.inlineCallbacks
@@ -117,96 +246,402 @@ class HttpTestCase(unittest.TestCase):
         yield self.port.stopListening()
         if hasattr(self.download_handler, 'close'):
             yield self.download_handler.close()
+        shutil.rmtree(self.tmpname)
 
     def getURL(self, path):
-        return "http://127.0.0.1:%d/%s" % (self.portno, path)
+        return "%s://%s:%d/%s" % (self.scheme, self.host, self.portno, path)
 
     def test_download(self):
         request = Request(self.getURL('file'))
         d = self.download_request(request, Spider('foo'))
         d.addCallback(lambda r: r.body)
-        d.addCallback(self.assertEquals, "0123456789")
+        d.addCallback(self.assertEqual, b"0123456789")
         return d
 
     def test_download_head(self):
         request = Request(self.getURL('file'), method='HEAD')
         d = self.download_request(request, Spider('foo'))
         d.addCallback(lambda r: r.body)
-        d.addCallback(self.assertEquals, '')
+        d.addCallback(self.assertEqual, b'')
         return d
 
     def test_redirect_status(self):
         request = Request(self.getURL('redirect'))
         d = self.download_request(request, Spider('foo'))
         d.addCallback(lambda r: r.status)
-        d.addCallback(self.assertEquals, 302)
+        d.addCallback(self.assertEqual, 302)
         return d
 
     def test_redirect_status_head(self):
         request = Request(self.getURL('redirect'), method='HEAD')
         d = self.download_request(request, Spider('foo'))
         d.addCallback(lambda r: r.status)
-        d.addCallback(self.assertEquals, 302)
+        d.addCallback(self.assertEqual, 302)
         return d
 
     @defer.inlineCallbacks
-    def test_timeout_download_from_spider(self):
+    def test_timeout_download_from_spider_nodata_rcvd(self):
+        # client connects but no data is received
         spider = Spider('foo')
         meta = {'download_timeout': 0.2}
-        # client connects but no data is received
         request = Request(self.getURL('wait'), meta=meta)
         d = self.download_request(request, spider)
         yield self.assertFailure(d, defer.TimeoutError, error.TimeoutError)
+
+    @defer.inlineCallbacks
+    def test_timeout_download_from_spider_server_hangs(self):
         # client connects, server send headers and some body bytes but hangs
+        spider = Spider('foo')
+        meta = {'download_timeout': 0.2}
         request = Request(self.getURL('hang-after-headers'), meta=meta)
         d = self.download_request(request, spider)
         yield self.assertFailure(d, defer.TimeoutError, error.TimeoutError)
 
     def test_host_header_not_in_request_headers(self):
         def _test(response):
-            self.assertEquals(response.body, '127.0.0.1:%d' % self.portno)
-            self.assertEquals(request.headers, {})
+            self.assertEqual(
+                response.body, to_bytes('%s:%d' % (self.host, self.portno)))
+            self.assertEqual(request.headers, {})
 
         request = Request(self.getURL('host'))
         return self.download_request(request, Spider('foo')).addCallback(_test)
 
     def test_host_header_seted_in_request_headers(self):
         def _test(response):
-            self.assertEquals(response.body, 'example.com')
-            self.assertEquals(request.headers.get('Host'), 'example.com')
+            self.assertEqual(response.body, b'example.com')
+            self.assertEqual(request.headers.get('Host'), b'example.com')
 
         request = Request(self.getURL('host'), headers={'Host': 'example.com'})
         return self.download_request(request, Spider('foo')).addCallback(_test)
 
         d = self.download_request(request, Spider('foo'))
         d.addCallback(lambda r: r.body)
-        d.addCallback(self.assertEquals, 'example.com')
+        d.addCallback(self.assertEqual, b'example.com')
         return d
 
+    def test_content_length_zero_bodyless_post_request_headers(self):
+        """Tests if "Content-Length: 0" is sent for bodyless POST requests.
+
+        This is not strictly required by HTTP RFCs but can cause trouble
+        for some web servers.
+        See:
+        https://github.com/scrapy/scrapy/issues/823
+        https://issues.apache.org/jira/browse/TS-2902
+        https://github.com/kennethreitz/requests/issues/405
+        https://bugs.python.org/issue14721
+        """
+        def _test(response):
+            self.assertEqual(response.body, b'0')
+
+        request = Request(self.getURL('contentlength'), method='POST', headers={'Host': 'example.com'})
+        return self.download_request(request, Spider('foo')).addCallback(_test)
+
+    def test_content_length_zero_bodyless_post_only_one(self):
+        def _test(response):
+            import json
+            headers = Headers(json.loads(response.text)['headers'])
+            contentlengths = headers.getlist('Content-Length')
+            self.assertEqual(len(contentlengths), 1)
+            self.assertEqual(contentlengths, [b"0"])
+
+        request = Request(self.getURL('echo'), method='POST')
+        return self.download_request(request, Spider('foo')).addCallback(_test)
+
     def test_payload(self):
-        body = '1'*100 # PayloadResource requires body length to be 100
+        body = b'1' * 100  # PayloadResource requires body length to be 100
         request = Request(self.getURL('payload'), method='POST', body=body)
         d = self.download_request(request, Spider('foo'))
         d.addCallback(lambda r: r.body)
-        d.addCallback(self.assertEquals, body)
+        d.addCallback(self.assertEqual, body)
         return d
 
 
-class DeprecatedHttpTestCase(HttpTestCase):
-    """HTTP 1.0 test case"""
-    download_handler_cls = HttpDownloadHandler
-
-
 class Http10TestCase(HttpTestCase):
     """HTTP 1.0 test case"""
     download_handler_cls = HTTP10DownloadHandler
 
 
+class Https10TestCase(Http10TestCase):
+    scheme = 'https'
+
+
 class Http11TestCase(HttpTestCase):
     """HTTP 1.1 test case"""
     download_handler_cls = HTTP11DownloadHandler
-    if 'http11' not in optional_features:
-        skip = 'HTTP1.1 not supported in twisted < 11.1.0'
+
+    def test_download_without_maxsize_limit(self):
+        request = Request(self.getURL('file'))
+        d = self.download_request(request, Spider('foo'))
+        d.addCallback(lambda r: r.body)
+        d.addCallback(self.assertEqual, b"0123456789")
+        return d
+
+    def test_response_class_choosing_request(self):
+        """Tests choosing of correct response type
+         in case of Content-Type is empty but body contains text.
+        """
+        body = b'Some plain text\ndata with tabs\t and null bytes\0'
+
+        def _test_type(response):
+            self.assertEqual(type(response), TextResponse)
+
+        request = Request(self.getURL('nocontenttype'), body=body)
+        d = self.download_request(request, Spider('foo'))
+        d.addCallback(_test_type)
+        return d
+
+    @defer.inlineCallbacks
+    def test_download_with_maxsize(self):
+        request = Request(self.getURL('file'))
+
+        # 10 is minimal size for this request and the limit is only counted on
+        # response body. (regardless of headers)
+        d = self.download_request(request, Spider('foo', download_maxsize=10))
+        d.addCallback(lambda r: r.body)
+        d.addCallback(self.assertEqual, b"0123456789")
+        yield d
+
+        d = self.download_request(request, Spider('foo', download_maxsize=9))
+        yield self.assertFailure(d, defer.CancelledError, error.ConnectionAborted)
+
+    @defer.inlineCallbacks
+    def test_download_with_maxsize_very_large_file(self):
+        with mock.patch('scrapy.core.downloader.handlers.http11.logger') as logger:
+            request = Request(self.getURL('largechunkedfile'))
+
+            def check(logger):
+                logger.error.assert_called_once_with(mock.ANY, mock.ANY)
+
+            d = self.download_request(request, Spider('foo', download_maxsize=1500))
+            yield self.assertFailure(d, defer.CancelledError, error.ConnectionAborted)
+
+            # As the error message is logged in the dataReceived callback, we
+            # have to give a bit of time to the reactor to process the queue
+            # after closing the connection.
+            d = defer.Deferred()
+            d.addCallback(check)
+            reactor.callLater(.1, d.callback, logger)
+            yield d
+
+    @defer.inlineCallbacks
+    def test_download_with_maxsize_per_req(self):
+        meta = {'download_maxsize': 2}
+        request = Request(self.getURL('file'), meta=meta)
+        d = self.download_request(request, Spider('foo'))
+        yield self.assertFailure(d, defer.CancelledError, error.ConnectionAborted)
+
+    @defer.inlineCallbacks
+    def test_download_with_small_maxsize_per_spider(self):
+        request = Request(self.getURL('file'))
+        d = self.download_request(request, Spider('foo', download_maxsize=2))
+        yield self.assertFailure(d, defer.CancelledError, error.ConnectionAborted)
+
+    def test_download_with_large_maxsize_per_spider(self):
+        request = Request(self.getURL('file'))
+        d = self.download_request(request, Spider('foo', download_maxsize=100))
+        d.addCallback(lambda r: r.body)
+        d.addCallback(self.assertEqual, b"0123456789")
+        return d
+
+    def test_download_chunked_content(self):
+        request = Request(self.getURL('chunked'))
+        d = self.download_request(request, Spider('foo'))
+        d.addCallback(lambda r: r.body)
+        d.addCallback(self.assertEqual, b"chunked content\n")
+        return d
+
+    def test_download_broken_content_cause_data_loss(self, url='broken'):
+        request = Request(self.getURL(url))
+        d = self.download_request(request, Spider('foo'))
+
+        def checkDataLoss(failure):
+            if failure.check(ResponseFailed):
+                if any(r.check(_DataLoss) for r in failure.value.reasons):
+                    return None
+            return failure
+
+        d.addCallback(lambda _: self.fail("No DataLoss exception"))
+        d.addErrback(checkDataLoss)
+        return d
+
+    def test_download_broken_chunked_content_cause_data_loss(self):
+        return self.test_download_broken_content_cause_data_loss('broken-chunked')
+
+    def test_download_broken_content_allow_data_loss(self, url='broken'):
+        request = Request(self.getURL(url), meta={'download_fail_on_dataloss': False})
+        d = self.download_request(request, Spider('foo'))
+        d.addCallback(lambda r: r.flags)
+        d.addCallback(self.assertEqual, ['dataloss'])
+        return d
+
+    def test_download_broken_chunked_content_allow_data_loss(self):
+        return self.test_download_broken_content_allow_data_loss('broken-chunked')
+
+    def test_download_broken_content_allow_data_loss_via_setting(self, url='broken'):
+        crawler = get_crawler(settings_dict={'DOWNLOAD_FAIL_ON_DATALOSS': False})
+        download_handler = create_instance(self.download_handler_cls, None, crawler)
+        request = Request(self.getURL(url))
+        d = download_handler.download_request(request, Spider('foo'))
+        d.addCallback(lambda r: r.flags)
+        d.addCallback(self.assertEqual, ['dataloss'])
+        return d
+
+    def test_download_broken_chunked_content_allow_data_loss_via_setting(self):
+        return self.test_download_broken_content_allow_data_loss_via_setting('broken-chunked')
+
+
+class Https11TestCase(Http11TestCase):
+    scheme = 'https'
+
+    tls_log_message = (
+        'SSL connection certificate: issuer "/C=IE/O=Scrapy/CN=localhost", '
+        'subject "/C=IE/O=Scrapy/CN=localhost"'
+    )
+
+    @defer.inlineCallbacks
+    def test_tls_logging(self):
+        crawler = get_crawler(settings_dict={'DOWNLOADER_CLIENT_TLS_VERBOSE_LOGGING': True})
+        download_handler = create_instance(self.download_handler_cls, None, crawler)
+        try:
+            with LogCapture() as log_capture:
+                request = Request(self.getURL('file'))
+                d = download_handler.download_request(request, Spider('foo'))
+                d.addCallback(lambda r: r.body)
+                d.addCallback(self.assertEqual, b"0123456789")
+                yield d
+                log_capture.check_present(('scrapy.core.downloader.tls', 'DEBUG', self.tls_log_message))
+        finally:
+            yield download_handler.close()
+
+
+class Https11WrongHostnameTestCase(Http11TestCase):
+    scheme = 'https'
+
+    # above tests use a server certificate for "localhost",
+    # client connection to "localhost" too.
+    # here we test that even if the server certificate is for another domain,
+    # "www.example.com" in this case,
+    # the tests still pass
+    keyfile = 'keys/example-com.key.pem'
+    certfile = 'keys/example-com.cert.pem'
+
+
+class Https11InvalidDNSId(Https11TestCase):
+    """Connect to HTTPS hosts with IP while certificate uses domain names IDs."""
+
+    def setUp(self):
+        super().setUp()
+        self.host = '127.0.0.1'
+
+
+class Https11InvalidDNSPattern(Https11TestCase):
+    """Connect to HTTPS hosts where the certificate are issued to an ip instead of a domain."""
+
+    keyfile = 'keys/localhost.ip.key'
+    certfile = 'keys/localhost.ip.crt'
+
+    def setUp(self):
+        try:
+            from service_identity.exceptions import CertificateError  # noqa: F401
+        except ImportError:
+            raise unittest.SkipTest("cryptography lib is too old")
+        self.tls_log_message = (
+            'SSL connection certificate: issuer "/C=IE/O=Scrapy/CN=127.0.0.1", '
+            'subject "/C=IE/O=Scrapy/CN=127.0.0.1"'
+        )
+        super().setUp()
+
+
+class Https11CustomCiphers(unittest.TestCase):
+    scheme = 'https'
+    download_handler_cls = HTTP11DownloadHandler
+
+    keyfile = 'keys/localhost.key'
+    certfile = 'keys/localhost.crt'
+
+    def setUp(self):
+        self.tmpname = self.mktemp()
+        os.mkdir(self.tmpname)
+        FilePath(self.tmpname).child("file").setContent(b"0123456789")
+        r = static.File(self.tmpname)
+        self.site = server.Site(r, timeout=None)
+        self.wrapper = WrappingFactory(self.site)
+        self.host = 'localhost'
+        self.port = reactor.listenSSL(
+            0, self.wrapper, ssl_context_factory(self.keyfile, self.certfile, cipher_string='CAMELLIA256-SHA'),
+            interface=self.host)
+        self.portno = self.port.getHost().port
+        crawler = get_crawler(settings_dict={'DOWNLOADER_CLIENT_TLS_CIPHERS': 'CAMELLIA256-SHA'})
+        self.download_handler = create_instance(self.download_handler_cls, None, crawler)
+        self.download_request = self.download_handler.download_request
+
+    @defer.inlineCallbacks
+    def tearDown(self):
+        yield self.port.stopListening()
+        if hasattr(self.download_handler, 'close'):
+            yield self.download_handler.close()
+        shutil.rmtree(self.tmpname)
+
+    def getURL(self, path):
+        return "%s://%s:%d/%s" % (self.scheme, self.host, self.portno, path)
+
+    def test_download(self):
+        request = Request(self.getURL('file'))
+        d = self.download_request(request, Spider('foo'))
+        d.addCallback(lambda r: r.body)
+        d.addCallback(self.assertEqual, b"0123456789")
+        return d
+
+
+class Http11MockServerTestCase(unittest.TestCase):
+    """HTTP 1.1 test case with MockServer"""
+
+    def setUp(self):
+        self.mockserver = MockServer()
+        self.mockserver.__enter__()
+
+    def tearDown(self):
+        self.mockserver.__exit__(None, None, None)
+
+    @defer.inlineCallbacks
+    def test_download_with_content_length(self):
+        crawler = get_crawler(SingleRequestSpider)
+        # http://localhost:8998/partial set Content-Length to 1024, use download_maxsize= 1000 to avoid
+        # download it
+        yield crawler.crawl(seed=Request(url=self.mockserver.url('/partial'), meta={'download_maxsize': 1000}))
+        failure = crawler.spider.meta['failure']
+        self.assertIsInstance(failure.value, defer.CancelledError)
+
+    @defer.inlineCallbacks
+    def test_download(self):
+        crawler = get_crawler(SingleRequestSpider)
+        yield crawler.crawl(seed=Request(url=self.mockserver.url('')))
+        failure = crawler.spider.meta.get('failure')
+        self.assertTrue(failure is None)
+        reason = crawler.spider.meta['close_reason']
+        self.assertTrue(reason, 'finished')
+
+    @defer.inlineCallbacks
+    def test_download_gzip_response(self):
+        crawler = get_crawler(SingleRequestSpider)
+        body = b'1' * 100  # PayloadResource requires body length to be 100
+        request = Request(self.mockserver.url('/payload'), method='POST',
+                          body=body, meta={'download_maxsize': 50})
+        yield crawler.crawl(seed=request)
+        failure = crawler.spider.meta['failure']
+        # download_maxsize < 100, hence the CancelledError
+        self.assertIsInstance(failure.value, defer.CancelledError)
+
+        # See issue https://twistedmatrix.com/trac/ticket/8175
+        raise unittest.SkipTest("xpayload fails on PY3")
+        request.headers.setdefault(b'Accept-Encoding', b'gzip,deflate')
+        request = request.replace(url=self.mockserver.url('/xpayload'))
+        yield crawler.crawl(seed=request)
+        # download_maxsize = 50 is enough for the gzipped response
+        failure = crawler.spider.meta.get('failure')
+        self.assertTrue(failure is None)
+        reason = crawler.spider.meta['close_reason']
+        self.assertTrue(reason, 'finished')
 
 
 class UriResource(resource.Resource):
@@ -216,7 +651,13 @@ class UriResource(resource.Resource):
         return self
 
     def render(self, request):
-        return request.uri
+        # Note: this is an ugly hack for CONNECT request timeout test.
+        #       Returning some data here fail SSL/TLS handshake
+        # ToDo: implement proper HTTPS proxy tests, not faking them.
+        if request.method != b'CONNECT':
+            return request.uri
+        else:
+            return b''
 
 
 class HttpProxyTestCase(unittest.TestCase):
@@ -227,7 +668,7 @@ class HttpProxyTestCase(unittest.TestCase):
         wrapper = WrappingFactory(site)
         self.port = reactor.listenTCP(0, wrapper, interface='127.0.0.1')
         self.portno = self.port.getHost().port
-        self.download_handler = self.download_handler_cls(Settings())
+        self.download_handler = create_instance(self.download_handler_cls, None, get_crawler())
         self.download_request = self.download_handler.download_request
 
     @defer.inlineCallbacks
@@ -241,9 +682,9 @@ class HttpProxyTestCase(unittest.TestCase):
 
     def test_download_with_proxy(self):
         def _test(response):
-            self.assertEquals(response.status, 200)
-            self.assertEquals(response.url, request.url)
-            self.assertEquals(response.body, 'http://example.com')
+            self.assertEqual(response.status, 200)
+            self.assertEqual(response.url, request.url)
+            self.assertEqual(response.body, b'http://example.com')
 
         http_proxy = self.getURL('')
         request = Request('http://example.com', meta={'proxy': http_proxy})
@@ -251,48 +692,83 @@ class HttpProxyTestCase(unittest.TestCase):
 
     def test_download_with_proxy_https_noconnect(self):
         def _test(response):
-            self.assertEquals(response.status, 200)
-            self.assertEquals(response.url, request.url)
-            self.assertEquals(response.body, 'https://example.com')
+            self.assertEqual(response.status, 200)
+            self.assertEqual(response.url, request.url)
+            self.assertEqual(response.body, b'https://example.com')
 
         http_proxy = '%s?noconnect' % self.getURL('')
         request = Request('https://example.com', meta={'proxy': http_proxy})
-        return self.download_request(request, Spider('foo')).addCallback(_test)
+        with self.assertWarnsRegex(ScrapyDeprecationWarning,
+                                   r'Using HTTPS proxies in the noconnect mode is deprecated'):
+            return self.download_request(request, Spider('foo')).addCallback(_test)
 
     def test_download_without_proxy(self):
         def _test(response):
-            self.assertEquals(response.status, 200)
-            self.assertEquals(response.url, request.url)
-            self.assertEquals(response.body, '/path/to/resource')
+            self.assertEqual(response.status, 200)
+            self.assertEqual(response.url, request.url)
+            self.assertEqual(response.body, b'/path/to/resource')
 
         request = Request(self.getURL('path/to/resource'))
         return self.download_request(request, Spider('foo')).addCallback(_test)
 
 
-class DeprecatedHttpProxyTestCase(unittest.TestCase):
-    """Old deprecated reference to http10 downloader handler"""
-    download_handler_cls = HttpDownloadHandler
-
-
 class Http10ProxyTestCase(HttpProxyTestCase):
     download_handler_cls = HTTP10DownloadHandler
 
+    def test_download_with_proxy_https_noconnect(self):
+        raise unittest.SkipTest('noconnect is not supported in HTTP10DownloadHandler')
+
 
 class Http11ProxyTestCase(HttpProxyTestCase):
     download_handler_cls = HTTP11DownloadHandler
-    if 'http11' not in optional_features:
-        skip = 'HTTP1.1 not supported in twisted < 11.1.0'
+
+    @defer.inlineCallbacks
+    def test_download_with_proxy_https_timeout(self):
+        """ Test TunnelingTCP4ClientEndpoint """
+        http_proxy = self.getURL('')
+        domain = 'https://no-such-domain.nosuch'
+        request = Request(
+            domain, meta={'proxy': http_proxy, 'download_timeout': 0.2})
+        d = self.download_request(request, Spider('foo'))
+        timeout = yield self.assertFailure(d, error.TimeoutError)
+        self.assertIn(domain, timeout.osError)
 
 
-class HttpDownloadHandlerMock(object):
-    def __init__(self, settings):
+class HttpDownloadHandlerMock:
+
+    def __init__(self, *args, **kwargs):
         pass
 
     def download_request(self, request, spider):
         return request
 
+
+class S3AnonTestCase(unittest.TestCase):
+
+    def setUp(self):
+        skip_if_no_boto()
+        crawler = get_crawler()
+        self.s3reqh = create_instance(
+            objcls=S3DownloadHandler,
+            settings=None,
+            crawler=crawler,
+            httpdownloadhandler=HttpDownloadHandlerMock,
+            # anon=True, # implicit
+        )
+        self.download_request = self.s3reqh.download_request
+        self.spider = Spider('foo')
+
+    def test_anon_request(self):
+        req = Request('s3://aws-publicdatasets/')
+        httpreq = self.download_request(req, self.spider)
+        self.assertEqual(hasattr(self.s3reqh, 'anon'), True)
+        self.assertEqual(self.s3reqh.anon, True)
+        self.assertEqual(
+            httpreq.url, 'http://aws-publicdatasets.s3.amazonaws.com/')
+
+
 class S3TestCase(unittest.TestCase):
-    skip = 'boto' not in optional_features and 'missing boto library'
+    download_handler_cls = S3DownloadHandler
 
     # test use same example keys than amazon developer guide
     # http://s3.amazonaws.com/awsdocs/S3/20060301/s3-dg-20060301.pdf
@@ -302,98 +778,176 @@ class S3TestCase(unittest.TestCase):
     AWS_SECRET_ACCESS_KEY = 'uV3F3YluFJax1cknvbcGwgjvx4QpvB+leU8dUj2o'
 
     def setUp(self):
-        s3reqh = S3DownloadHandler(Settings(), self.AWS_ACCESS_KEY_ID, \
-                self.AWS_SECRET_ACCESS_KEY, \
-                httpdownloadhandler=HttpDownloadHandlerMock)
+        skip_if_no_boto()
+        crawler = get_crawler()
+        s3reqh = create_instance(
+            objcls=S3DownloadHandler,
+            settings=None,
+            crawler=crawler,
+            aws_access_key_id=self.AWS_ACCESS_KEY_ID,
+            aws_secret_access_key=self.AWS_SECRET_ACCESS_KEY,
+            httpdownloadhandler=HttpDownloadHandlerMock,
+        )
         self.download_request = s3reqh.download_request
         self.spider = Spider('foo')
 
+    @contextlib.contextmanager
+    def _mocked_date(self, date):
+        try:
+            import botocore.auth  # noqa: F401
+        except ImportError:
+            yield
+        else:
+            # We need to mock botocore.auth.formatdate, because otherwise
+            # botocore overrides Date header with current date and time
+            # and Authorization header is different each time
+            with mock.patch('botocore.auth.formatdate') as mock_formatdate:
+                mock_formatdate.return_value = date
+                yield
+
+    def test_extra_kw(self):
+        try:
+            crawler = get_crawler()
+            create_instance(
+                objcls=S3DownloadHandler,
+                settings=None,
+                crawler=crawler,
+                extra_kw=True,
+            )
+        except Exception as e:
+            self.assertIsInstance(e, (TypeError, NotConfigured))
+        else:
+            assert False
+
     def test_request_signing1(self):
         # gets an object from the johnsmith bucket.
-        req = Request('s3://johnsmith/photos/puppy.jpg',
-                headers={'Date': 'Tue, 27 Mar 2007 19:36:42 +0000'})
-        httpreq = self.download_request(req, self.spider)
-        self.assertEqual(httpreq.headers['Authorization'], \
-                'AWS 0PN5J17HBGZHT7JJ3X82:xXjDGYUmKxnwqr5KXNPGldn5LbA=')
+        date = 'Tue, 27 Mar 2007 19:36:42 +0000'
+        req = Request('s3://johnsmith/photos/puppy.jpg', headers={'Date': date})
+        with self._mocked_date(date):
+            httpreq = self.download_request(req, self.spider)
+        self.assertEqual(httpreq.headers['Authorization'],
+                         b'AWS 0PN5J17HBGZHT7JJ3X82:xXjDGYUmKxnwqr5KXNPGldn5LbA=')
 
     def test_request_signing2(self):
         # puts an object into the johnsmith bucket.
-        req = Request('s3://johnsmith/photos/puppy.jpg', method='PUT', headers={
-            'Content-Type': 'image/jpeg',
-            'Date': 'Tue, 27 Mar 2007 21:15:45 +0000',
-            'Content-Length': '94328',
-            })
-        httpreq = self.download_request(req, self.spider)
-        self.assertEqual(httpreq.headers['Authorization'], \
-                'AWS 0PN5J17HBGZHT7JJ3X82:hcicpDDvL9SsO6AkvxqmIWkmOuQ=')
+        date = 'Tue, 27 Mar 2007 21:15:45 +0000'
+        req = Request(
+            's3://johnsmith/photos/puppy.jpg',
+            method='PUT',
+            headers={
+                'Content-Type': 'image/jpeg',
+                'Date': date,
+                'Content-Length': '94328',
+            },
+        )
+        with self._mocked_date(date):
+            httpreq = self.download_request(req, self.spider)
+        self.assertEqual(httpreq.headers['Authorization'],
+                         b'AWS 0PN5J17HBGZHT7JJ3X82:hcicpDDvL9SsO6AkvxqmIWkmOuQ=')
 
     def test_request_signing3(self):
         # lists the content of the johnsmith bucket.
-        req = Request('s3://johnsmith/?prefix=photos&max-keys=50&marker=puppy', \
-                method='GET', headers={
-                    'User-Agent': 'Mozilla/5.0',
-                    'Date': 'Tue, 27 Mar 2007 19:42:41 +0000',
-                    })
-        httpreq = self.download_request(req, self.spider)
-        self.assertEqual(httpreq.headers['Authorization'], \
-                'AWS 0PN5J17HBGZHT7JJ3X82:jsRt/rhG+Vtp88HrYL706QhE4w4=')
+        date = 'Tue, 27 Mar 2007 19:42:41 +0000'
+        req = Request(
+            's3://johnsmith/?prefix=photos&max-keys=50&marker=puppy',
+            method='GET', headers={
+                'User-Agent': 'Mozilla/5.0',
+                'Date': date,
+            })
+        with self._mocked_date(date):
+            httpreq = self.download_request(req, self.spider)
+        self.assertEqual(httpreq.headers['Authorization'],
+                         b'AWS 0PN5J17HBGZHT7JJ3X82:jsRt/rhG+Vtp88HrYL706QhE4w4=')
 
     def test_request_signing4(self):
         # fetches the access control policy sub-resource for the 'johnsmith' bucket.
-        req = Request('s3://johnsmith/?acl', \
-                method='GET', headers={'Date': 'Tue, 27 Mar 2007 19:44:46 +0000'})
-        httpreq = self.download_request(req, self.spider)
-        self.assertEqual(httpreq.headers['Authorization'], \
-                'AWS 0PN5J17HBGZHT7JJ3X82:thdUi9VAkzhkniLj96JIrOPGi0g=')
+        date = 'Tue, 27 Mar 2007 19:44:46 +0000'
+        req = Request('s3://johnsmith/?acl', method='GET', headers={'Date': date})
+        with self._mocked_date(date):
+            httpreq = self.download_request(req, self.spider)
+        self.assertEqual(httpreq.headers['Authorization'],
+                         b'AWS 0PN5J17HBGZHT7JJ3X82:thdUi9VAkzhkniLj96JIrOPGi0g=')
 
     def test_request_signing5(self):
+        try:
+            import botocore  # noqa: F401
+        except ImportError:
+            pass
+        else:
+            raise unittest.SkipTest(
+                'botocore does not support overriding date with x-amz-date')
         # deletes an object from the 'johnsmith' bucket using the
         # path-style and Date alternative.
-        req = Request('s3://johnsmith/photos/puppy.jpg', \
-                method='DELETE', headers={
-                    'Date': 'Tue, 27 Mar 2007 21:20:27 +0000',
-                    'x-amz-date': 'Tue, 27 Mar 2007 21:20:26 +0000',
-                    })
-        httpreq = self.download_request(req, self.spider)
-        self.assertEqual(httpreq.headers['Authorization'], \
-                'AWS 0PN5J17HBGZHT7JJ3X82:k3nL7gH3+PadhTEVn5Ip83xlYzk=')
+        date = 'Tue, 27 Mar 2007 21:20:27 +0000'
+        req = Request(
+            's3://johnsmith/photos/puppy.jpg', method='DELETE', headers={
+                'Date': date,
+                'x-amz-date': 'Tue, 27 Mar 2007 21:20:26 +0000',
+            })
+        with self._mocked_date(date):
+            httpreq = self.download_request(req, self.spider)
+        # botocore does not override Date with x-amz-date
+        self.assertEqual(
+            httpreq.headers['Authorization'],
+            b'AWS 0PN5J17HBGZHT7JJ3X82:k3nL7gH3+PadhTEVn5Ip83xlYzk=')
 
     def test_request_signing6(self):
         # uploads an object to a CNAME style virtual hosted bucket with metadata.
-        req = Request('s3://static.johnsmith.net:8080/db-backup.dat.gz', \
-                method='PUT', headers={
-                    'User-Agent': 'curl/7.15.5',
-                    'Host': 'static.johnsmith.net:8080',
-                    'Date': 'Tue, 27 Mar 2007 21:06:08 +0000',
-                    'x-amz-acl': 'public-read',
-                    'content-type': 'application/x-download',
-                    'Content-MD5': '4gJE4saaMU4BqNR0kLY+lw==',
-                    'X-Amz-Meta-ReviewedBy': 'joe@johnsmith.net,jane@johnsmith.net',
-                    'X-Amz-Meta-FileChecksum': '0x02661779',
-                    'X-Amz-Meta-ChecksumAlgorithm': 'crc32',
-                    'Content-Disposition': 'attachment; filename=database.dat',
-                    'Content-Encoding': 'gzip',
-                    'Content-Length': '5913339',
-                    })
-        httpreq = self.download_request(req, self.spider)
-        self.assertEqual(httpreq.headers['Authorization'], \
-                'AWS 0PN5J17HBGZHT7JJ3X82:C0FlOtU8Ylb9KDTpZqYkZPX91iI=')
+        date = 'Tue, 27 Mar 2007 21:06:08 +0000'
+        req = Request(
+            's3://static.johnsmith.net:8080/db-backup.dat.gz',
+            method='PUT', headers={
+                'User-Agent': 'curl/7.15.5',
+                'Host': 'static.johnsmith.net:8080',
+                'Date': date,
+                'x-amz-acl': 'public-read',
+                'content-type': 'application/x-download',
+                'Content-MD5': '4gJE4saaMU4BqNR0kLY+lw==',
+                'X-Amz-Meta-ReviewedBy': 'joe@johnsmith.net,jane@johnsmith.net',
+                'X-Amz-Meta-FileChecksum': '0x02661779',
+                'X-Amz-Meta-ChecksumAlgorithm': 'crc32',
+                'Content-Disposition': 'attachment; filename=database.dat',
+                'Content-Encoding': 'gzip',
+                'Content-Length': '5913339',
+            })
+        with self._mocked_date(date):
+            httpreq = self.download_request(req, self.spider)
+        self.assertEqual(httpreq.headers['Authorization'],
+                         b'AWS 0PN5J17HBGZHT7JJ3X82:C0FlOtU8Ylb9KDTpZqYkZPX91iI=')
 
-class FTPTestCase(unittest.TestCase):
+    def test_request_signing7(self):
+        # ensure that spaces are quoted properly before signing
+        date = 'Tue, 27 Mar 2007 19:42:41 +0000'
+        req = Request(
+            "s3://johnsmith/photos/my puppy.jpg?response-content-disposition=my puppy.jpg",
+            method='GET',
+            headers={'Date': date},
+        )
+        with self._mocked_date(date):
+            httpreq = self.download_request(req, self.spider)
+        self.assertEqual(
+            httpreq.headers['Authorization'],
+            b'AWS 0PN5J17HBGZHT7JJ3X82:+CfvG8EZ3YccOrRVMXNaK2eKZmM=')
+
+
+class BaseFTPTestCase(unittest.TestCase):
 
     username = "scrapy"
     password = "passwd"
-
-    if twisted_version < (10, 2, 0):
-        skip = "Twisted pre 10.2.0 doesn't allow to set home path other than /home"
+    req_meta = {"ftp_user": username, "ftp_password": password}
 
     def setUp(self):
+        from twisted.protocols.ftp import FTPRealm, FTPFactory
+        from scrapy.core.downloader.handlers.ftp import FTPDownloadHandler
+
         # setup dirs and test file
         self.directory = self.mktemp()
         os.mkdir(self.directory)
         userdir = os.path.join(self.directory, self.username)
         os.mkdir(userdir)
-        FilePath(userdir).child('file.txt').setContent("I have the power!")
+        fp = FilePath(userdir)
+        fp.child('file.txt').setContent(b"I have the power!")
+        fp.child('file with spaces.txt').setContent(b"Moooooooooo power!")
 
         # setup server
         realm = FTPRealm(anonymousRoot=self.directory, userHome=self.directory)
@@ -404,9 +958,13 @@ class FTPTestCase(unittest.TestCase):
         self.factory = FTPFactory(portal=p)
         self.port = reactor.listenTCP(0, self.factory, interface="127.0.0.1")
         self.portNum = self.port.getHost().port
-        self.download_handler = FTPDownloadHandler(Settings())
+        crawler = get_crawler()
+        self.download_handler = create_instance(FTPDownloadHandler, crawler.settings, crawler)
         self.addCleanup(self.port.stopListening)
 
+    def tearDown(self):
+        shutil.rmtree(self.directory)
+
     def _add_test_callbacks(self, deferred, callback=None, errback=None):
         def _clean(data):
             self.download_handler.client.transport.loseConnection()
@@ -420,18 +978,31 @@ class FTPTestCase(unittest.TestCase):
 
     def test_ftp_download_success(self):
         request = Request(url="ftp://127.0.0.1:%s/file.txt" % self.portNum,
-                meta={"ftp_user": self.username, "ftp_password": self.password})
+                          meta=self.req_meta)
         d = self.download_handler.download_request(request, None)
 
         def _test(r):
             self.assertEqual(r.status, 200)
-            self.assertEqual(r.body, 'I have the power!')
-            self.assertEqual(r.headers, {'Local Filename': [''], 'Size': ['17']})
+            self.assertEqual(r.body, b'I have the power!')
+            self.assertEqual(r.headers, {b'Local Filename': [b''], b'Size': [b'17']})
+        return self._add_test_callbacks(d, _test)
+
+    def test_ftp_download_path_with_spaces(self):
+        request = Request(
+            url="ftp://127.0.0.1:%s/file with spaces.txt" % self.portNum,
+            meta=self.req_meta
+        )
+        d = self.download_handler.download_request(request, None)
+
+        def _test(r):
+            self.assertEqual(r.status, 200)
+            self.assertEqual(r.body, b'Moooooooooo power!')
+            self.assertEqual(r.headers, {b'Local Filename': [b''], b'Size': [b'18']})
         return self._add_test_callbacks(d, _test)
 
     def test_ftp_download_notexist(self):
         request = Request(url="ftp://127.0.0.1:%s/notexist.txt" % self.portNum,
-                meta={"ftp_user": self.username, "ftp_password": self.password})
+                          meta=self.req_meta)
         d = self.download_handler.download_request(request, None)
 
         def _test(r):
@@ -439,25 +1010,136 @@ class FTPTestCase(unittest.TestCase):
         return self._add_test_callbacks(d, _test)
 
     def test_ftp_local_filename(self):
-        local_fname = "/tmp/file.txt"
+        f, local_fname = tempfile.mkstemp()
+        local_fname = to_bytes(local_fname)
+        os.close(f)
+        meta = {"ftp_local_filename": local_fname}
+        meta.update(self.req_meta)
         request = Request(url="ftp://127.0.0.1:%s/file.txt" % self.portNum,
-                meta={"ftp_user": self.username, "ftp_password": self.password, "ftp_local_filename": local_fname})
+                          meta=meta)
         d = self.download_handler.download_request(request, None)
 
         def _test(r):
             self.assertEqual(r.body, local_fname)
-            self.assertEqual(r.headers, {'Local Filename': ['/tmp/file.txt'], 'Size': ['17']})
+            self.assertEqual(r.headers, {b'Local Filename': [local_fname],
+                                         b'Size': [b'17']})
             self.assertTrue(os.path.exists(local_fname))
-            with open(local_fname) as f:
-                self.assertEqual(f.read(), "I have the power!")
+            with open(local_fname, "rb") as f:
+                self.assertEqual(f.read(), b"I have the power!")
             os.remove(local_fname)
         return self._add_test_callbacks(d, _test)
 
+
+class FTPTestCase(BaseFTPTestCase):
+
     def test_invalid_credentials(self):
+        from twisted.protocols.ftp import ConnectionLost
+
+        meta = dict(self.req_meta)
+        meta.update({"ftp_password": 'invalid'})
         request = Request(url="ftp://127.0.0.1:%s/file.txt" % self.portNum,
-                meta={"ftp_user": self.username, "ftp_password": 'invalid'})
+                          meta=meta)
         d = self.download_handler.download_request(request, None)
 
         def _test(r):
             self.assertEqual(r.type, ConnectionLost)
         return self._add_test_callbacks(d, errback=_test)
+
+
+class AnonymousFTPTestCase(BaseFTPTestCase):
+
+    username = "anonymous"
+    req_meta = {}
+
+    def setUp(self):
+        from twisted.protocols.ftp import FTPRealm, FTPFactory
+        from scrapy.core.downloader.handlers.ftp import FTPDownloadHandler
+
+        # setup dir and test file
+        self.directory = self.mktemp()
+        os.mkdir(self.directory)
+
+        fp = FilePath(self.directory)
+        fp.child('file.txt').setContent(b"I have the power!")
+        fp.child('file with spaces.txt').setContent(b"Moooooooooo power!")
+
+        # setup server for anonymous access
+        realm = FTPRealm(anonymousRoot=self.directory)
+        p = portal.Portal(realm)
+        p.registerChecker(checkers.AllowAnonymousAccess(),
+                          credentials.IAnonymous)
+
+        self.factory = FTPFactory(portal=p,
+                                  userAnonymous=self.username)
+        self.port = reactor.listenTCP(0, self.factory, interface="127.0.0.1")
+        self.portNum = self.port.getHost().port
+        crawler = get_crawler()
+        self.download_handler = create_instance(FTPDownloadHandler, crawler.settings, crawler)
+        self.addCleanup(self.port.stopListening)
+
+    def tearDown(self):
+        shutil.rmtree(self.directory)
+
+
+class DataURITestCase(unittest.TestCase):
+
+    def setUp(self):
+        crawler = get_crawler()
+        self.download_handler = create_instance(DataURIDownloadHandler, crawler.settings, crawler)
+        self.download_request = self.download_handler.download_request
+        self.spider = Spider('foo')
+
+    def test_response_attrs(self):
+        uri = "data:,A%20brief%20note"
+
+        def _test(response):
+            self.assertEqual(response.url, uri)
+            self.assertFalse(response.headers)
+
+        request = Request(uri)
+        return self.download_request(request, self.spider).addCallback(_test)
+
+    def test_default_mediatype_encoding(self):
+        def _test(response):
+            self.assertEqual(response.text, 'A brief note')
+            self.assertEqual(type(response), responsetypes.from_mimetype("text/plain"))
+            self.assertEqual(response.encoding, "US-ASCII")
+
+        request = Request("data:,A%20brief%20note")
+        return self.download_request(request, self.spider).addCallback(_test)
+
+    def test_default_mediatype(self):
+        def _test(response):
+            self.assertEqual(response.text, '\u038e\u03a3\u038e')
+            self.assertEqual(type(response), responsetypes.from_mimetype("text/plain"))
+            self.assertEqual(response.encoding, "iso-8859-7")
+
+        request = Request("data:;charset=iso-8859-7,%be%d3%be")
+        return self.download_request(request, self.spider).addCallback(_test)
+
+    def test_text_charset(self):
+        def _test(response):
+            self.assertEqual(response.text, '\u038e\u03a3\u038e')
+            self.assertEqual(response.body, b'\xbe\xd3\xbe')
+            self.assertEqual(response.encoding, "iso-8859-7")
+
+        request = Request("data:text/plain;charset=iso-8859-7,%be%d3%be")
+        return self.download_request(request, self.spider).addCallback(_test)
+
+    def test_mediatype_parameters(self):
+        def _test(response):
+            self.assertEqual(response.text, '\u038e\u03a3\u038e')
+            self.assertEqual(type(response), responsetypes.from_mimetype("text/plain"))
+            self.assertEqual(response.encoding, "utf-8")
+
+        request = Request('data:text/plain;foo=%22foo;bar%5C%22%22;'
+                          'charset=utf-8;bar=%22foo;%5C%22 foo ;/,%22'
+                          ',%CE%8E%CE%A3%CE%8E')
+        return self.download_request(request, self.spider).addCallback(_test)
+
+    def test_base64(self):
+        def _test(response):
+            self.assertEqual(response.text, 'Hello, world.')
+
+        request = Request('data:text/plain;base64,SGVsbG8sIHdvcmxkLg%3D%3D')
+        return self.download_request(request, self.spider).addCallback(_test)
diff --git a/tests/test_downloadermiddleware.py b/tests/test_downloadermiddleware.py
index b7d3594cd..a9190c62b 100644
--- a/tests/test_downloadermiddleware.py
+++ b/tests/test_downloadermiddleware.py
@@ -1,10 +1,18 @@
+import asyncio
+from unittest import mock
+
+from pytest import mark
+from twisted.internet import defer
+from twisted.internet.defer import Deferred
 from twisted.trial.unittest import TestCase
 from twisted.python.failure import Failure
 
 from scrapy.http import Request, Response
-from scrapy.spider import Spider
+from scrapy.spiders import Spider
+from scrapy.exceptions import _InvalidOutput
 from scrapy.core.downloader.middleware import DownloaderMiddlewareManager
-from scrapy.utils.test import get_crawler
+from scrapy.utils.test import get_crawler, get_from_asyncio_queue
+from scrapy.utils.python import to_bytes
 
 
 class ManagerTestCase(TestCase):
@@ -12,9 +20,8 @@ class ManagerTestCase(TestCase):
     settings_dict = None
 
     def setUp(self):
-        self.crawler = get_crawler(self.settings_dict)
-        self.spider = Spider('foo')
-        self.spider.set_crawler(self.crawler)
+        self.crawler = get_crawler(Spider, self.settings_dict)
+        self.spider = self.crawler._create_spider('foo')
         self.mwman = DownloaderMiddlewareManager.from_crawler(self.crawler)
         # some mw depends on stats collector
         self.crawler.stats.open_spider(self.spider)
@@ -68,7 +75,7 @@ class DefaultsTest(ManagerTestCase):
 
         """
         req = Request('http://example.com')
-        body = '<p>You are being redirected</p>'
+        body = b'<p>You are being redirected</p>'
         resp = Response(req.url, status=302, body=body, headers={
             'Content-Length': str(len(body)),
             'Content-Type': 'text/html',
@@ -78,12 +85,12 @@ class DefaultsTest(ManagerTestCase):
         ret = self._download(request=req, response=resp)
         self.assertTrue(isinstance(ret, Request),
                         "Not redirected: {0!r}".format(ret))
-        self.assertEqual(ret.url, resp.headers['Location'],
+        self.assertEqual(to_bytes(ret.url), resp.headers['Location'],
                          "Not redirected to location header")
 
     def test_200_and_invalid_gzipped_body_must_fail(self):
         req = Request('http://example.com')
-        body = '<p>You are being redirected</p>'
+        body = b'<p>You are being redirected</p>'
         resp = Response(req.url, status=200, body=body, headers={
             'Content-Length': str(len(body)),
             'Content-Type': 'text/html',
@@ -91,3 +98,158 @@ class DefaultsTest(ManagerTestCase):
             'Location': 'http://example.com/login',
         })
         self.assertRaises(IOError, self._download, request=req, response=resp)
+
+
+class ResponseFromProcessRequestTest(ManagerTestCase):
+    """Tests middleware returning a response from process_request."""
+
+    def test_download_func_not_called(self):
+        resp = Response('http://example.com/index.html')
+
+        class ResponseMiddleware:
+            def process_request(self, request, spider):
+                return resp
+
+        self.mwman._add_middleware(ResponseMiddleware())
+
+        req = Request('http://example.com/index.html')
+        download_func = mock.MagicMock()
+        dfd = self.mwman.download(download_func, req, self.spider)
+        results = []
+        dfd.addBoth(results.append)
+        self._wait(dfd)
+
+        self.assertIs(results[0], resp)
+        self.assertFalse(download_func.called)
+
+
+class ProcessRequestInvalidOutput(ManagerTestCase):
+    """Invalid return value for process_request method should raise an exception"""
+
+    def test_invalid_process_request(self):
+        req = Request('http://example.com/index.html')
+
+        class InvalidProcessRequestMiddleware:
+            def process_request(self, request, spider):
+                return 1
+
+        self.mwman._add_middleware(InvalidProcessRequestMiddleware())
+        download_func = mock.MagicMock()
+        dfd = self.mwman.download(download_func, req, self.spider)
+        results = []
+        dfd.addBoth(results.append)
+        self.assertIsInstance(results[0], Failure)
+        self.assertIsInstance(results[0].value, _InvalidOutput)
+
+
+class ProcessResponseInvalidOutput(ManagerTestCase):
+    """Invalid return value for process_response method should raise an exception"""
+
+    def test_invalid_process_response(self):
+        req = Request('http://example.com/index.html')
+
+        class InvalidProcessResponseMiddleware:
+            def process_response(self, request, response, spider):
+                return 1
+
+        self.mwman._add_middleware(InvalidProcessResponseMiddleware())
+        download_func = mock.MagicMock()
+        dfd = self.mwman.download(download_func, req, self.spider)
+        results = []
+        dfd.addBoth(results.append)
+        self.assertIsInstance(results[0], Failure)
+        self.assertIsInstance(results[0].value, _InvalidOutput)
+
+
+class ProcessExceptionInvalidOutput(ManagerTestCase):
+    """Invalid return value for process_exception method should raise an exception"""
+
+    def test_invalid_process_exception(self):
+        req = Request('http://example.com/index.html')
+
+        class InvalidProcessExceptionMiddleware:
+            def process_request(self, request, spider):
+                raise Exception()
+
+            def process_exception(self, request, exception, spider):
+                return 1
+
+        self.mwman._add_middleware(InvalidProcessExceptionMiddleware())
+        download_func = mock.MagicMock()
+        dfd = self.mwman.download(download_func, req, self.spider)
+        results = []
+        dfd.addBoth(results.append)
+        self.assertIsInstance(results[0], Failure)
+        self.assertIsInstance(results[0].value, _InvalidOutput)
+
+
+class MiddlewareUsingDeferreds(ManagerTestCase):
+    """Middlewares using Deferreds should work"""
+
+    def test_deferred(self):
+        resp = Response('http://example.com/index.html')
+
+        class DeferredMiddleware:
+            def cb(self, result):
+                return result
+
+            def process_request(self, request, spider):
+                d = Deferred()
+                d.addCallback(self.cb)
+                d.callback(resp)
+                return d
+
+        self.mwman._add_middleware(DeferredMiddleware())
+        req = Request('http://example.com/index.html')
+        download_func = mock.MagicMock()
+        dfd = self.mwman.download(download_func, req, self.spider)
+        results = []
+        dfd.addBoth(results.append)
+        self._wait(dfd)
+
+        self.assertIs(results[0], resp)
+        self.assertFalse(download_func.called)
+
+
+class MiddlewareUsingCoro(ManagerTestCase):
+    """Middlewares using asyncio coroutines should work"""
+
+    def test_asyncdef(self):
+        resp = Response('http://example.com/index.html')
+
+        class CoroMiddleware:
+            async def process_request(self, request, spider):
+                await defer.succeed(42)
+                return resp
+
+        self.mwman._add_middleware(CoroMiddleware())
+        req = Request('http://example.com/index.html')
+        download_func = mock.MagicMock()
+        dfd = self.mwman.download(download_func, req, self.spider)
+        results = []
+        dfd.addBoth(results.append)
+        self._wait(dfd)
+
+        self.assertIs(results[0], resp)
+        self.assertFalse(download_func.called)
+
+    @mark.only_asyncio()
+    def test_asyncdef_asyncio(self):
+        resp = Response('http://example.com/index.html')
+
+        class CoroMiddleware:
+            async def process_request(self, request, spider):
+                await asyncio.sleep(0.1)
+                result = await get_from_asyncio_queue(resp)
+                return result
+
+        self.mwman._add_middleware(CoroMiddleware())
+        req = Request('http://example.com/index.html')
+        download_func = mock.MagicMock()
+        dfd = self.mwman.download(download_func, req, self.spider)
+        results = []
+        dfd.addBoth(results.append)
+        self._wait(dfd)
+
+        self.assertIs(results[0], resp)
+        self.assertFalse(download_func.called)
diff --git a/tests/test_downloadermiddleware_ajaxcrawlable.py b/tests/test_downloadermiddleware_ajaxcrawlable.py
index 3e5ce6052..5a56c9db2 100644
--- a/tests/test_downloadermiddleware_ajaxcrawlable.py
+++ b/tests/test_downloadermiddleware_ajaxcrawlable.py
@@ -1,20 +1,22 @@
 import unittest
 
-from scrapy.contrib.downloadermiddleware.ajaxcrawl import AjaxCrawlMiddleware
-from scrapy.spider import Spider
+from scrapy.downloadermiddlewares.ajaxcrawl import AjaxCrawlMiddleware
+from scrapy.spiders import Spider
 from scrapy.http import Request, HtmlResponse, Response
 from scrapy.utils.test import get_crawler
 
-__doctests__ = ['scrapy.contrib.downloadermiddleware.ajaxcrawl']
+
+__doctests__ = ['scrapy.downloadermiddlewares.ajaxcrawl']
+
 
 class AjaxCrawlMiddlewareTest(unittest.TestCase):
     def setUp(self):
-        self.spider = Spider('foo')
-        crawler = get_crawler({'AJAXCRAWL_ENABLED': True})
+        crawler = get_crawler(Spider, {'AJAXCRAWL_ENABLED': True})
+        self.spider = crawler._create_spider('foo')
         self.mw = AjaxCrawlMiddleware.from_crawler(crawler)
 
     def _ajaxcrawlable_body(self):
-        return '<html><head><meta name="fragment" content="!"/></head><body></body></html>'
+        return b'<html><head><meta name="fragment" content="!"/></head><body></body></html>'
 
     def _req_resp(self, url, req_kwargs=None, resp_kwargs=None):
         req = Request(url, **(req_kwargs or {}))
@@ -53,6 +55,6 @@ class AjaxCrawlMiddlewareTest(unittest.TestCase):
         assert resp3 is resp2
 
     def test_noncrawlable_body(self):
-        req, resp = self._req_resp('http://example.com/', {}, {'body': '<html></html>'})
+        req, resp = self._req_resp('http://example.com/', {}, {'body': b'<html></html>'})
         resp2 = self.mw.process_response(req, resp, self.spider)
         self.assertIs(resp, resp2)
diff --git a/tests/test_downloadermiddleware_cookies.py b/tests/test_downloadermiddleware_cookies.py
index ffa3a550c..010577415 100644
--- a/tests/test_downloadermiddleware_cookies.py
+++ b/tests/test_downloadermiddleware_cookies.py
@@ -1,18 +1,22 @@
+import logging
+from testfixtures import LogCapture
 from unittest import TestCase
-import re
 
+from scrapy.downloadermiddlewares.cookies import CookiesMiddleware
+from scrapy.downloadermiddlewares.defaultheaders import DefaultHeadersMiddleware
+from scrapy.exceptions import NotConfigured
 from scrapy.http import Response, Request
-from scrapy.spider import Spider
-from scrapy.contrib.downloadermiddleware.cookies import CookiesMiddleware
+from scrapy.spiders import Spider
+from scrapy.utils.python import to_bytes
+from scrapy.utils.test import get_crawler
 
 
 class CookiesMiddlewareTest(TestCase):
 
     def assertCookieValEqual(self, first, second, msg=None):
-        cookievaleq = lambda cv: re.split(';\s*', cv)
-        return self.assertEqual(
-            sorted(cookievaleq(first)),
-            sorted(cookievaleq(second)), msg)
+        def split_cookies(cookies):
+            return sorted([s.strip() for s in to_bytes(cookies).split(b";")])
+        return self.assertEqual(split_cookies(first), split_cookies(second), msg=msg)
 
     def setUp(self):
         self.spider = Spider('foo')
@@ -22,19 +26,92 @@ class CookiesMiddlewareTest(TestCase):
         del self.mw
 
     def test_basic(self):
-        headers = {'Set-Cookie': 'C1=value1; path=/'}
         req = Request('http://scrapytest.org/')
         assert self.mw.process_request(req, self.spider) is None
         assert 'Cookie' not in req.headers
 
+        headers = {'Set-Cookie': 'C1=value1; path=/'}
         res = Response('http://scrapytest.org/', headers=headers)
         assert self.mw.process_response(req, res, self.spider) is res
 
-        #assert res.cookies
+        req2 = Request('http://scrapytest.org/sub1/')
+        assert self.mw.process_request(req2, self.spider) is None
+        self.assertEqual(req2.headers.get('Cookie'), b"C1=value1")
+
+    def test_setting_false_cookies_enabled(self):
+        self.assertRaises(
+            NotConfigured,
+            CookiesMiddleware.from_crawler,
+            get_crawler(settings_dict={'COOKIES_ENABLED': False})
+        )
+
+    def test_setting_default_cookies_enabled(self):
+        self.assertIsInstance(
+            CookiesMiddleware.from_crawler(get_crawler()),
+            CookiesMiddleware
+        )
+
+    def test_setting_true_cookies_enabled(self):
+        self.assertIsInstance(
+            CookiesMiddleware.from_crawler(
+                get_crawler(settings_dict={'COOKIES_ENABLED': True})
+            ),
+            CookiesMiddleware
+        )
+
+    def test_setting_enabled_cookies_debug(self):
+        crawler = get_crawler(settings_dict={'COOKIES_DEBUG': True})
+        mw = CookiesMiddleware.from_crawler(crawler)
+        with LogCapture(
+            'scrapy.downloadermiddlewares.cookies',
+            propagate=False,
+            level=logging.DEBUG,
+        ) as log:
+            req = Request('http://scrapytest.org/')
+            res = Response('http://scrapytest.org/', headers={'Set-Cookie': 'C1=value1; path=/'})
+            mw.process_response(req, res, crawler.spider)
+            req2 = Request('http://scrapytest.org/sub1/')
+            mw.process_request(req2, crawler.spider)
+
+            log.check(
+                ('scrapy.downloadermiddlewares.cookies',
+                 'DEBUG',
+                 'Received cookies from: <200 http://scrapytest.org/>\n'
+                 'Set-Cookie: C1=value1; path=/\n'),
+                ('scrapy.downloadermiddlewares.cookies',
+                 'DEBUG',
+                 'Sending cookies to: <GET http://scrapytest.org/sub1/>\n'
+                 'Cookie: C1=value1\n'),
+            )
+
+    def test_setting_disabled_cookies_debug(self):
+        crawler = get_crawler(settings_dict={'COOKIES_DEBUG': False})
+        mw = CookiesMiddleware.from_crawler(crawler)
+        with LogCapture(
+            'scrapy.downloadermiddlewares.cookies',
+            propagate=False,
+            level=logging.DEBUG,
+        ) as log:
+            req = Request('http://scrapytest.org/')
+            res = Response('http://scrapytest.org/', headers={'Set-Cookie': 'C1=value1; path=/'})
+            mw.process_response(req, res, crawler.spider)
+            req2 = Request('http://scrapytest.org/sub1/')
+            mw.process_request(req2, crawler.spider)
+
+            log.check()
+
+    def test_do_not_break_on_non_utf8_header(self):
+        req = Request('http://scrapytest.org/')
+        assert self.mw.process_request(req, self.spider) is None
+        assert 'Cookie' not in req.headers
+
+        headers = {'Set-Cookie': b'C1=in\xa3valid; path=/', 'Other': b'ignore\xa3me'}
+        res = Response('http://scrapytest.org/', headers=headers)
+        assert self.mw.process_response(req, res, self.spider) is res
 
         req2 = Request('http://scrapytest.org/sub1/')
         assert self.mw.process_request(req2, self.spider) is None
-        self.assertEquals(req2.headers.get('Cookie'), "C1=value1")
+        self.assertIn('Cookie', req2.headers)
 
     def test_dont_merge_cookies(self):
         # merge some cookies into jar
@@ -49,26 +126,30 @@ class CookiesMiddlewareTest(TestCase):
         assert 'Cookie' not in req.headers
 
         # check that returned cookies are not merged back to jar
-        res = Response('http://scrapytest.org/dontmerge', headers={'Set-Cookie': 'dont=mergeme; path=/'})
+        res = Response(
+            'http://scrapytest.org/dontmerge',
+            headers={'Set-Cookie': 'dont=mergeme; path=/'},
+        )
         assert self.mw.process_response(req, res, self.spider) is res
 
         # check that cookies are merged back
         req = Request('http://scrapytest.org/mergeme')
         assert self.mw.process_request(req, self.spider) is None
-        self.assertEquals(req.headers.get('Cookie'), 'C1=value1')
+        self.assertEqual(req.headers.get('Cookie'), b'C1=value1')
 
         # check that cookies are merged when dont_merge_cookies is passed as 0
         req = Request('http://scrapytest.org/mergeme', meta={'dont_merge_cookies': 0})
         assert self.mw.process_request(req, self.spider) is None
-        self.assertEquals(req.headers.get('Cookie'), 'C1=value1')
+        self.assertEqual(req.headers.get('Cookie'), b'C1=value1')
 
     def test_complex_cookies(self):
         # merge some cookies into jar
-        cookies = [{'name': 'C1', 'value': 'value1', 'path': '/foo', 'domain': 'scrapytest.org'},
-                {'name': 'C2', 'value': 'value2', 'path': '/bar', 'domain': 'scrapytest.org'},
-                {'name': 'C3', 'value': 'value3', 'path': '/foo', 'domain': 'scrapytest.org'},
-                {'name': 'C4', 'value': 'value4', 'path': '/foo', 'domain': 'scrapy.org'}]
-
+        cookies = [
+            {'name': 'C1', 'value': 'value1', 'path': '/foo', 'domain': 'scrapytest.org'},
+            {'name': 'C2', 'value': 'value2', 'path': '/bar', 'domain': 'scrapytest.org'},
+            {'name': 'C3', 'value': 'value3', 'path': '/foo', 'domain': 'scrapytest.org'},
+            {'name': 'C4', 'value': 'value4', 'path': '/foo', 'domain': 'scrapy.org'},
+        ]
 
         req = Request('http://scrapytest.org/', cookies=cookies)
         self.mw.process_request(req, self.spider)
@@ -76,12 +157,12 @@ class CookiesMiddlewareTest(TestCase):
         # embed C1 and C3 for scrapytest.org/foo
         req = Request('http://scrapytest.org/foo')
         self.mw.process_request(req, self.spider)
-        assert req.headers.get('Cookie') in ('C1=value1; C3=value3', 'C3=value3; C1=value1')
+        assert req.headers.get('Cookie') in (b'C1=value1; C3=value3', b'C3=value3; C1=value1')
 
         # embed C2 for scrapytest.org/bar
         req = Request('http://scrapytest.org/bar')
         self.mw.process_request(req, self.spider)
-        self.assertEquals(req.headers.get('Cookie'), 'C2=value2')
+        self.assertEqual(req.headers.get('Cookie'), b'C2=value2')
 
         # embed nothing for scrapytest.org/baz
         req = Request('http://scrapytest.org/baz')
@@ -91,7 +172,7 @@ class CookiesMiddlewareTest(TestCase):
     def test_merge_request_cookies(self):
         req = Request('http://scrapytest.org/', cookies={'galleta': 'salada'})
         assert self.mw.process_request(req, self.spider) is None
-        self.assertEquals(req.headers.get('Cookie'), 'galleta=salada')
+        self.assertEqual(req.headers.get('Cookie'), b'galleta=salada')
 
         headers = {'Set-Cookie': 'C1=value1; path=/'}
         res = Response('http://scrapytest.org/', headers=headers)
@@ -100,12 +181,16 @@ class CookiesMiddlewareTest(TestCase):
         req2 = Request('http://scrapytest.org/sub1/')
         assert self.mw.process_request(req2, self.spider) is None
 
-        self.assertCookieValEqual(req2.headers.get('Cookie'), "C1=value1; galleta=salada")
+        self.assertCookieValEqual(req2.headers.get('Cookie'), b"C1=value1; galleta=salada")
 
     def test_cookiejar_key(self):
-        req = Request('http://scrapytest.org/', cookies={'galleta': 'salada'}, meta={'cookiejar': "store1"})
+        req = Request(
+            'http://scrapytest.org/',
+            cookies={'galleta': 'salada'},
+            meta={'cookiejar': "store1"},
+        )
         assert self.mw.process_request(req, self.spider) is None
-        self.assertEquals(req.headers.get('Cookie'), 'galleta=salada')
+        self.assertEqual(req.headers.get('Cookie'), b'galleta=salada')
 
         headers = {'Set-Cookie': 'C1=value1; path=/'}
         res = Response('http://scrapytest.org/', headers=headers, request=req)
@@ -113,11 +198,15 @@ class CookiesMiddlewareTest(TestCase):
 
         req2 = Request('http://scrapytest.org/', meta=res.meta)
         assert self.mw.process_request(req2, self.spider) is None
-        self.assertCookieValEqual(req2.headers.get('Cookie'),'C1=value1; galleta=salada')
+        self.assertCookieValEqual(req2.headers.get('Cookie'), b'C1=value1; galleta=salada')
 
-        req3 = Request('http://scrapytest.org/', cookies={'galleta': 'dulce'}, meta={'cookiejar': "store2"})
+        req3 = Request(
+            'http://scrapytest.org/',
+            cookies={'galleta': 'dulce'},
+            meta={'cookiejar': "store2"},
+        )
         assert self.mw.process_request(req3, self.spider) is None
-        self.assertEquals(req3.headers.get('Cookie'), 'galleta=dulce')
+        self.assertEqual(req3.headers.get('Cookie'), b'galleta=dulce')
 
         headers = {'Set-Cookie': 'C2=value2; path=/'}
         res2 = Response('http://scrapytest.org/', headers=headers, request=req3)
@@ -125,9 +214,9 @@ class CookiesMiddlewareTest(TestCase):
 
         req4 = Request('http://scrapytest.org/', meta=res2.meta)
         assert self.mw.process_request(req4, self.spider) is None
-        self.assertCookieValEqual(req4.headers.get('Cookie'), 'C2=value2; galleta=dulce')
+        self.assertCookieValEqual(req4.headers.get('Cookie'), b'C2=value2; galleta=dulce')
 
-        #cookies from hosts with port
+        # cookies from hosts with port
         req5_1 = Request('http://scrapytest.org:1104/')
         assert self.mw.process_request(req5_1, self.spider) is None
 
@@ -137,13 +226,111 @@ class CookiesMiddlewareTest(TestCase):
 
         req5_2 = Request('http://scrapytest.org:1104/some-redirected-path')
         assert self.mw.process_request(req5_2, self.spider) is None
-        self.assertEquals(req5_2.headers.get('Cookie'), 'C1=value1')
+        self.assertEqual(req5_2.headers.get('Cookie'), b'C1=value1')
 
         req5_3 = Request('http://scrapytest.org/some-redirected-path')
         assert self.mw.process_request(req5_3, self.spider) is None
-        self.assertEquals(req5_3.headers.get('Cookie'), 'C1=value1')
+        self.assertEqual(req5_3.headers.get('Cookie'), b'C1=value1')
 
-        #skip cookie retrieval for not http request
+        # skip cookie retrieval for not http request
         req6 = Request('file:///scrapy/sometempfile')
         assert self.mw.process_request(req6, self.spider) is None
-        self.assertEquals(req6.headers.get('Cookie'), None)
+        self.assertEqual(req6.headers.get('Cookie'), None)
+
+    def test_local_domain(self):
+        request = Request("http://example-host/", cookies={'currencyCookie': 'USD'})
+        assert self.mw.process_request(request, self.spider) is None
+        self.assertIn('Cookie', request.headers)
+        self.assertEqual(b'currencyCookie=USD', request.headers['Cookie'])
+
+    def test_keep_cookie_from_default_request_headers_middleware(self):
+        DEFAULT_REQUEST_HEADERS = dict(Cookie='default=value; asdf=qwerty')
+        mw_default_headers = DefaultHeadersMiddleware(DEFAULT_REQUEST_HEADERS.items())
+        # overwrite with values from 'cookies' request argument
+        req1 = Request('http://example.org', cookies={'default': 'something'})
+        assert mw_default_headers.process_request(req1, self.spider) is None
+        assert self.mw.process_request(req1, self.spider) is None
+        self.assertCookieValEqual(req1.headers['Cookie'], b'default=something; asdf=qwerty')
+        # keep both
+        req2 = Request('http://example.com', cookies={'a': 'b'})
+        assert mw_default_headers.process_request(req2, self.spider) is None
+        assert self.mw.process_request(req2, self.spider) is None
+        self.assertCookieValEqual(req2.headers['Cookie'], b'default=value; a=b; asdf=qwerty')
+
+    def test_keep_cookie_header(self):
+        # keep only cookies from 'Cookie' request header
+        req1 = Request('http://scrapytest.org', headers={'Cookie': 'a=b; c=d'})
+        assert self.mw.process_request(req1, self.spider) is None
+        self.assertCookieValEqual(req1.headers['Cookie'], 'a=b; c=d')
+        # keep cookies from both 'Cookie' request header and 'cookies' keyword
+        req2 = Request('http://scrapytest.org', headers={'Cookie': 'a=b; c=d'}, cookies={'e': 'f'})
+        assert self.mw.process_request(req2, self.spider) is None
+        self.assertCookieValEqual(req2.headers['Cookie'], 'a=b; c=d; e=f')
+        # overwrite values from 'Cookie' request header with 'cookies' keyword
+        req3 = Request(
+            'http://scrapytest.org',
+            headers={'Cookie': 'a=b; c=d'},
+            cookies={'a': 'new', 'e': 'f'},
+        )
+        assert self.mw.process_request(req3, self.spider) is None
+        self.assertCookieValEqual(req3.headers['Cookie'], 'a=new; c=d; e=f')
+
+    def test_request_cookies_encoding(self):
+        # 1) UTF8-encoded bytes
+        req1 = Request('http://example.org', cookies={'a': 'á'.encode('utf8')})
+        assert self.mw.process_request(req1, self.spider) is None
+        self.assertCookieValEqual(req1.headers['Cookie'], b'a=\xc3\xa1')
+
+        # 2) Non UTF8-encoded bytes
+        req2 = Request('http://example.org', cookies={'a': 'á'.encode('latin1')})
+        assert self.mw.process_request(req2, self.spider) is None
+        self.assertCookieValEqual(req2.headers['Cookie'], b'a=\xc3\xa1')
+
+        # 3) String
+        req3 = Request('http://example.org', cookies={'a': 'á'})
+        assert self.mw.process_request(req3, self.spider) is None
+        self.assertCookieValEqual(req3.headers['Cookie'], b'a=\xc3\xa1')
+
+    def test_request_headers_cookie_encoding(self):
+        # 1) UTF8-encoded bytes
+        req1 = Request('http://example.org', headers={'Cookie': 'a=á'.encode('utf8')})
+        assert self.mw.process_request(req1, self.spider) is None
+        self.assertCookieValEqual(req1.headers['Cookie'], b'a=\xc3\xa1')
+
+        # 2) Non UTF8-encoded bytes
+        req2 = Request('http://example.org', headers={'Cookie': 'a=á'.encode('latin1')})
+        assert self.mw.process_request(req2, self.spider) is None
+        self.assertCookieValEqual(req2.headers['Cookie'], b'a=\xc3\xa1')
+
+        # 3) String
+        req3 = Request('http://example.org', headers={'Cookie': 'a=á'})
+        assert self.mw.process_request(req3, self.spider) is None
+        self.assertCookieValEqual(req3.headers['Cookie'], b'a=\xc3\xa1')
+
+    def test_invalid_cookies(self):
+        """
+        Invalid cookies are logged as warnings and discarded
+        """
+        with LogCapture(
+            'scrapy.downloadermiddlewares.cookies',
+            propagate=False,
+            level=logging.INFO,
+        ) as lc:
+            cookies1 = [{'value': 'bar'}, {'name': 'key', 'value': 'value1'}]
+            req1 = Request('http://example.org/1', cookies=cookies1)
+            assert self.mw.process_request(req1, self.spider) is None
+            cookies2 = [{'name': 'foo'}, {'name': 'key', 'value': 'value2'}]
+            req2 = Request('http://example.org/2', cookies=cookies2)
+            assert self.mw.process_request(req2, self.spider) is None
+            lc.check(
+                ("scrapy.downloadermiddlewares.cookies",
+                 "WARNING",
+                 "Invalid cookie found in request <GET http://example.org/1>:"
+                 " {'value': 'bar'} ('name' is missing)"),
+                ("scrapy.downloadermiddlewares.cookies",
+                 "WARNING",
+                 "Invalid cookie found in request <GET http://example.org/2>:"
+                 " {'name': 'foo'} ('value' is missing)"),
+            )
+        self.assertCookieValEqual(req1.headers['Cookie'], 'key=value1')
+        self.assertCookieValEqual(req2.headers['Cookie'], 'key=value2')
diff --git a/tests/test_downloadermiddleware_decompression.py b/tests/test_downloadermiddleware_decompression.py
index 81e12b4f9..dbae4d3ae 100644
--- a/tests/test_downloadermiddleware_decompression.py
+++ b/tests/test_downloadermiddleware_decompression.py
@@ -1,7 +1,7 @@
 from unittest import TestCase, main
 from scrapy.http import Response, XmlResponse
-from scrapy.contrib_exp.downloadermiddleware.decompression import DecompressionMiddleware
-from scrapy.spider import Spider
+from scrapy.downloadermiddlewares.decompression import DecompressionMiddleware
+from scrapy.spiders import Spider
 from tests import get_testdata
 from scrapy.utils.test import assert_samelines
 
@@ -16,7 +16,7 @@ def _test_data(formats):
 
 
 class DecompressionMiddlewareTest(TestCase):
-    
+
     test_formats = ['tar', 'xml.bz2', 'xml.gz', 'zip']
     uncompressed_body, test_responses = _test_data(test_formats)
 
@@ -28,8 +28,8 @@ class DecompressionMiddlewareTest(TestCase):
         for fmt in self.test_formats:
             rsp = self.test_responses[fmt]
             new = self.mw.process_response(None, rsp, self.spider)
-            assert isinstance(new, XmlResponse), \
-                    'Failed %s, response type %s' % (fmt, type(new).__name__)
+            error_msg = 'Failed %s, response type %s' % (fmt, type(new).__name__)
+            assert isinstance(new, XmlResponse), error_msg
             assert_samelines(self, new.body, self.uncompressed_body, fmt)
 
     def test_plain_response(self):
@@ -39,7 +39,7 @@ class DecompressionMiddlewareTest(TestCase):
         assert_samelines(self, new.body, rsp.body)
 
     def test_empty_response(self):
-        rsp = Response(url='http://test.com', body='')
+        rsp = Response(url='http://test.com', body=b'')
         new = self.mw.process_response(None, rsp, self.spider)
         assert new is rsp
         assert not rsp.body
diff --git a/tests/test_downloadermiddleware_defaultheaders.py b/tests/test_downloadermiddleware_defaultheaders.py
index b37a02336..6a31dfcf8 100644
--- a/tests/test_downloadermiddleware_defaultheaders.py
+++ b/tests/test_downloadermiddleware_defaultheaders.py
@@ -1,34 +1,36 @@
 from unittest import TestCase
-import six
 
-from scrapy.contrib.downloadermiddleware.defaultheaders import DefaultHeadersMiddleware
+from scrapy.downloadermiddlewares.defaultheaders import DefaultHeadersMiddleware
 from scrapy.http import Request
-from scrapy.spider import Spider
+from scrapy.spiders import Spider
 from scrapy.utils.test import get_crawler
+from scrapy.utils.python import to_bytes
 
 
 class TestDefaultHeadersMiddleware(TestCase):
 
     def get_defaults_spider_mw(self):
-        crawler = get_crawler()
-        spider = Spider('foo')
-        spider.set_crawler(crawler)
-        defaults = dict([(k, [v]) for k, v in \
-            six.iteritems(crawler.settings.get('DEFAULT_REQUEST_HEADERS'))])
+        crawler = get_crawler(Spider)
+        spider = crawler._create_spider('foo')
+        defaults = {
+            to_bytes(k): [to_bytes(v)]
+            for k, v in crawler.settings.get('DEFAULT_REQUEST_HEADERS').items()
+        }
         return defaults, spider, DefaultHeadersMiddleware.from_crawler(crawler)
 
     def test_process_request(self):
         defaults, spider, mw = self.get_defaults_spider_mw()
         req = Request('http://www.scrapytest.org')
         mw.process_request(req, spider)
-        self.assertEquals(req.headers, defaults)
+        self.assertEqual(req.headers, defaults)
 
     def test_update_headers(self):
         defaults, spider, mw = self.get_defaults_spider_mw()
         headers = {'Accept-Language': ['es'], 'Test-Header': ['test']}
+        bytes_headers = {b'Accept-Language': [b'es'], b'Test-Header': [b'test']}
         req = Request('http://www.scrapytest.org', headers=headers)
-        self.assertEquals(req.headers, headers)
+        self.assertEqual(req.headers, bytes_headers)
 
         mw.process_request(req, spider)
-        defaults.update(headers)
-        self.assertEquals(req.headers, defaults)
+        defaults.update(bytes_headers)
+        self.assertEqual(req.headers, defaults)
diff --git a/tests/test_downloadermiddleware_downloadtimeout.py b/tests/test_downloadermiddleware_downloadtimeout.py
index 52a0cc09d..586bdc0d1 100644
--- a/tests/test_downloadermiddleware_downloadtimeout.py
+++ b/tests/test_downloadermiddleware_downloadtimeout.py
@@ -1,17 +1,16 @@
 import unittest
 
-from scrapy.contrib.downloadermiddleware.downloadtimeout import DownloadTimeoutMiddleware
-from scrapy.spider import Spider
+from scrapy.downloadermiddlewares.downloadtimeout import DownloadTimeoutMiddleware
+from scrapy.spiders import Spider
 from scrapy.http import Request
 from scrapy.utils.test import get_crawler
 
 
 class DownloadTimeoutMiddlewareTest(unittest.TestCase):
 
-    def get_request_spider_mw(self):
-        crawler = get_crawler()
-        spider = Spider('foo')
-        spider.set_crawler(crawler)
+    def get_request_spider_mw(self, settings=None):
+        crawler = get_crawler(Spider, settings)
+        spider = crawler._create_spider('foo')
         request = Request('http://scrapytest.org/')
         return request, spider, DownloadTimeoutMiddleware.from_crawler(crawler)
 
@@ -19,14 +18,20 @@ class DownloadTimeoutMiddlewareTest(unittest.TestCase):
         req, spider, mw = self.get_request_spider_mw()
         mw.spider_opened(spider)
         assert mw.process_request(req, spider) is None
-        self.assertEquals(req.meta.get('download_timeout'), 180)
+        self.assertEqual(req.meta.get('download_timeout'), 180)
+
+    def test_string_download_timeout(self):
+        req, spider, mw = self.get_request_spider_mw({'DOWNLOAD_TIMEOUT': '20.1'})
+        mw.spider_opened(spider)
+        assert mw.process_request(req, spider) is None
+        self.assertEqual(req.meta.get('download_timeout'), 20.1)
 
     def test_spider_has_download_timeout(self):
         req, spider, mw = self.get_request_spider_mw()
         spider.download_timeout = 2
         mw.spider_opened(spider)
         assert mw.process_request(req, spider) is None
-        self.assertEquals(req.meta.get('download_timeout'), 2)
+        self.assertEqual(req.meta.get('download_timeout'), 2)
 
     def test_request_has_download_timeout(self):
         req, spider, mw = self.get_request_spider_mw()
@@ -34,4 +39,4 @@ class DownloadTimeoutMiddlewareTest(unittest.TestCase):
         mw.spider_opened(spider)
         req.meta['download_timeout'] = 1
         assert mw.process_request(req, spider) is None
-        self.assertEquals(req.meta.get('download_timeout'), 1)
+        self.assertEqual(req.meta.get('download_timeout'), 1)
diff --git a/tests/test_downloadermiddleware_httpauth.py b/tests/test_downloadermiddleware_httpauth.py
index adfcd802d..3381632b0 100644
--- a/tests/test_downloadermiddleware_httpauth.py
+++ b/tests/test_downloadermiddleware_httpauth.py
@@ -1,13 +1,15 @@
 import unittest
 
 from scrapy.http import Request
-from scrapy.contrib.downloadermiddleware.httpauth import HttpAuthMiddleware
-from scrapy.spider import Spider
+from scrapy.downloadermiddlewares.httpauth import HttpAuthMiddleware
+from scrapy.spiders import Spider
+
 
 class TestSpider(Spider):
     http_user = 'foo'
     http_pass = 'bar'
 
+
 class HttpAuthMiddlewareTest(unittest.TestCase):
 
     def setUp(self):
@@ -21,13 +23,10 @@ class HttpAuthMiddlewareTest(unittest.TestCase):
     def test_auth(self):
         req = Request('http://scrapytest.org/')
         assert self.mw.process_request(req, self.spider) is None
-        self.assertEquals(req.headers['Authorization'], 'Basic Zm9vOmJhcg==')
+        self.assertEqual(req.headers['Authorization'], b'Basic Zm9vOmJhcg==')
 
     def test_auth_already_set(self):
-        req = Request('http://scrapytest.org/', headers=dict(Authorization='Digest 123'))
+        req = Request('http://scrapytest.org/',
+                      headers=dict(Authorization='Digest 123'))
         assert self.mw.process_request(req, self.spider) is None
-        self.assertEquals(req.headers['Authorization'], 'Digest 123')
-
-
-if __name__ == '__main__':
-    unittest.main()
+        self.assertEqual(req.headers['Authorization'], b'Digest 123')
diff --git a/tests/test_downloadermiddleware_httpcache.py b/tests/test_downloadermiddleware_httpcache.py
index 0eb5e7144..299fb0eb8 100644
--- a/tests/test_downloadermiddleware_httpcache.py
+++ b/tests/test_downloadermiddleware_httpcache.py
@@ -1,37 +1,35 @@
-from __future__ import print_function
 import time
 import tempfile
 import shutil
 import unittest
 import email.utils
 from contextlib import contextmanager
-import pytest
 
 from scrapy.http import Response, HtmlResponse, Request
-from scrapy.spider import Spider
+from scrapy.spiders import Spider
 from scrapy.settings import Settings
 from scrapy.exceptions import IgnoreRequest
 from scrapy.utils.test import get_crawler
-from scrapy.contrib.downloadermiddleware.httpcache import HttpCacheMiddleware
+from scrapy.downloadermiddlewares.httpcache import HttpCacheMiddleware
 
 
 class _BaseTest(unittest.TestCase):
 
-    storage_class = 'scrapy.contrib.httpcache.DbmCacheStorage'
-    policy_class = 'scrapy.contrib.httpcache.RFC2616Policy'
+    storage_class = 'scrapy.extensions.httpcache.DbmCacheStorage'
+    policy_class = 'scrapy.extensions.httpcache.RFC2616Policy'
 
     def setUp(self):
         self.yesterday = email.utils.formatdate(time.time() - 86400)
         self.today = email.utils.formatdate()
         self.tomorrow = email.utils.formatdate(time.time() + 86400)
-        self.crawler = get_crawler()
-        self.spider = Spider('example.com')
+        self.crawler = get_crawler(Spider)
+        self.spider = self.crawler._create_spider('example.com')
         self.tmpdir = tempfile.mkdtemp()
         self.request = Request('http://www.example.com',
                                headers={'User-Agent': 'test'})
         self.response = Response('http://www.example.com',
                                  headers={'Content-Type': 'text/html'},
-                                 body='test body',
+                                 body=b'test body',
                                  status=202)
         self.crawler.stats.open_spider(self.spider)
 
@@ -84,11 +82,23 @@ class _BaseTest(unittest.TestCase):
 
     def assertEqualRequestButWithCacheValidators(self, request1, request2):
         self.assertEqual(request1.url, request2.url)
-        assert not 'If-None-Match' in request1.headers
-        assert not 'If-Modified-Since' in request1.headers
-        assert any(h in request2.headers for h in ('If-None-Match', 'If-Modified-Since'))
+        assert b'If-None-Match' not in request1.headers
+        assert b'If-Modified-Since' not in request1.headers
+        assert any(h in request2.headers for h in (b'If-None-Match', b'If-Modified-Since'))
         self.assertEqual(request1.body, request2.body)
 
+    def test_dont_cache(self):
+        with self._middleware() as mw:
+            self.request.meta['dont_cache'] = True
+            mw.process_response(self.request, self.response, self.spider)
+            self.assertEqual(mw.storage.retrieve_response(self.spider, self.request), None)
+
+        with self._middleware() as mw:
+            self.request.meta['dont_cache'] = False
+            mw.process_response(self.request, self.response, self.spider)
+            if mw.policy.should_cache_response(self.response, self.request):
+                self.assertIsInstance(mw.storage.retrieve_response(self.spider, self.request), self.response.__class__)
+
 
 class DefaultStorageTest(_BaseTest):
 
@@ -115,7 +125,7 @@ class DefaultStorageTest(_BaseTest):
 
 class DbmStorageTest(DefaultStorageTest):
 
-    storage_class = 'scrapy.contrib.httpcache.DbmCacheStorage'
+    storage_class = 'scrapy.extensions.httpcache.DbmCacheStorage'
 
 
 class DbmStorageWithCustomDbmModuleTest(DbmStorageTest):
@@ -124,7 +134,7 @@ class DbmStorageWithCustomDbmModuleTest(DbmStorageTest):
 
     def _get_settings(self, **new_settings):
         new_settings.setdefault('HTTPCACHE_DBM_MODULE', self.dbm_module)
-        return super(DbmStorageWithCustomDbmModuleTest, self)._get_settings(**new_settings)
+        return super()._get_settings(**new_settings)
 
     def test_custom_dbm_module_loaded(self):
         # make sure our dbm module has been loaded
@@ -134,18 +144,19 @@ class DbmStorageWithCustomDbmModuleTest(DbmStorageTest):
 
 class FilesystemStorageTest(DefaultStorageTest):
 
-    storage_class = 'scrapy.contrib.httpcache.FilesystemCacheStorage'
+    storage_class = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
 
 
-class LeveldbStorageTest(DefaultStorageTest):
+class FilesystemStorageGzipTest(FilesystemStorageTest):
 
-    pytest.importorskip('leveldb')
-    storage_class = 'scrapy.contrib.httpcache.LeveldbCacheStorage'
+    def _get_settings(self, **new_settings):
+        new_settings.setdefault('HTTPCACHE_GZIP', True)
+        return super()._get_settings(**new_settings)
 
 
 class DummyPolicyTest(_BaseTest):
 
-    policy_class = 'scrapy.contrib.httpcache.DummyPolicy'
+    policy_class = 'scrapy.extensions.httpcache.DummyPolicy'
 
     def test_middleware(self):
         with self._middleware() as mw:
@@ -237,9 +248,10 @@ class DummyPolicyTest(_BaseTest):
 
 class RFC2616PolicyTest(DefaultStorageTest):
 
-    policy_class = 'scrapy.contrib.httpcache.RFC2616Policy'
+    policy_class = 'scrapy.extensions.httpcache.RFC2616Policy'
 
     def _process_requestresponse(self, mw, request, response):
+        result = None
         try:
             result = mw.process_request(request, self.spider)
             if result:
@@ -274,7 +286,7 @@ class RFC2616PolicyTest(DefaultStorageTest):
             self.assertEqualResponse(res2, res3)
             # request with no-cache directive must not return cached response
             # but it allows new response to be stored
-            res0b = res0.replace(body='foo')
+            res0b = res0.replace(body=b'foo')
             res4 = self._process_requestresponse(mw, req2, res0b)
             self.assertEqualResponse(res4, res0b)
             assert 'cached' not in res4.flags
@@ -304,6 +316,7 @@ class RFC2616PolicyTest(DefaultStorageTest):
             (True, 203, {'Last-Modified': self.yesterday}),
             (True, 300, {'Last-Modified': self.yesterday}),
             (True, 301, {'Last-Modified': self.yesterday}),
+            (True, 308, {'Last-Modified': self.yesterday}),
             (True, 401, {'Last-Modified': self.yesterday}),
             (True, 404, {'Cache-Control': 'public, max-age=600'}),
             (True, 302, {'Expires': self.tomorrow}),
@@ -326,6 +339,25 @@ class RFC2616PolicyTest(DefaultStorageTest):
                     self.assertFalse(resc)
                     assert 'cached' not in res2.flags
 
+        # cache unconditionally unless response contains no-store or is a 304
+        with self._middleware(HTTPCACHE_ALWAYS_STORE=True) as mw:
+            for idx, (_, status, headers) in enumerate(responses):
+                shouldcache = 'no-store' not in headers.get('Cache-Control', '') and status != 304
+                req0 = Request('http://example2-%d.com' % idx)
+                res0 = Response(req0.url, status=status, headers=headers)
+                res1 = self._process_requestresponse(mw, req0, res0)
+                res304 = res0.replace(status=304)
+                res2 = self._process_requestresponse(mw, req0, res304 if shouldcache else res0)
+                self.assertEqualResponse(res1, res0)
+                self.assertEqualResponse(res2, res0)
+                resc = mw.storage.retrieve_response(self.spider, req0)
+                if shouldcache:
+                    self.assertEqualResponse(resc, res1)
+                    assert 'cached' in res2.flags and res2.status != 304
+                else:
+                    self.assertFalse(resc)
+                    assert 'cached' not in res2.flags
+
     def test_cached_and_fresh(self):
         sampledata = [
             (200, {'Date': self.yesterday, 'Expires': self.tomorrow}),
@@ -364,6 +396,13 @@ class RFC2616PolicyTest(DefaultStorageTest):
                 res2 = self._process_requestresponse(mw, req0, None)
                 self.assertEqualResponse(res1, res2)
                 assert 'cached' in res2.flags
+                # validate cached response if request max-age set as 0
+                req1 = req0.replace(headers={'Cache-Control': 'max-age=0'})
+                res304 = res0.replace(status=304)
+                assert mw.process_request(req1, self.spider) is None
+                res3 = self._process_requestresponse(mw, req1, res304)
+                self.assertEqualResponse(res1, res3)
+                assert 'cached' in res3.flags
 
     def test_cached_and_stale(self):
         sampledata = [
@@ -378,6 +417,9 @@ class RFC2616PolicyTest(DefaultStorageTest):
             (200, {'Cache-Control': 'no-cache'}),
             (200, {'Cache-Control': 'no-cache', 'ETag': 'foo'}),
             (200, {'Cache-Control': 'no-cache', 'Last-Modified': self.yesterday}),
+            (200, {'Cache-Control': 'no-cache,must-revalidate', 'Last-Modified': self.yesterday}),
+            (200, {'Cache-Control': 'must-revalidate', 'Expires': self.yesterday, 'Last-Modified': self.yesterday}),
+            (200, {'Cache-Control': 'max-age=86400,must-revalidate', 'Age': '86405'}),
         ]
         with self._middleware() as mw:
             for idx, (status, headers) in enumerate(sampledata):
@@ -389,10 +431,11 @@ class RFC2616PolicyTest(DefaultStorageTest):
                 assert 'cached' not in res1.flags
                 # Same request but as cached response is stale a new response must
                 # be returned
-                res0b = res0a.replace(body='bar')
+                res0b = res0a.replace(body=b'bar')
                 res2 = self._process_requestresponse(mw, req0, res0b)
                 self.assertEqualResponse(res2, res0b)
                 assert 'cached' not in res2.flags
+                cc = headers.get('Cache-Control', '')
                 # Previous response expired too, subsequent request to same
                 # resource must revalidate and succeed on 304 if validators
                 # are present
@@ -401,6 +444,62 @@ class RFC2616PolicyTest(DefaultStorageTest):
                     res3 = self._process_requestresponse(mw, req0, res0c)
                     self.assertEqualResponse(res3, res0b)
                     assert 'cached' in res3.flags
+                    # get cached response on server errors unless must-revalidate
+                    # in cached response
+                    res0d = res0b.replace(status=500)
+                    res4 = self._process_requestresponse(mw, req0, res0d)
+                    if 'must-revalidate' in cc:
+                        assert 'cached' not in res4.flags
+                        self.assertEqualResponse(res4, res0d)
+                    else:
+                        assert 'cached' in res4.flags
+                        self.assertEqualResponse(res4, res0b)
+                # Requests with max-stale can fetch expired cached responses
+                # unless cached response has must-revalidate
+                req1 = req0.replace(headers={'Cache-Control': 'max-stale'})
+                res5 = self._process_requestresponse(mw, req1, res0b)
+                self.assertEqualResponse(res5, res0b)
+                if 'no-cache' in cc or 'must-revalidate' in cc:
+                    assert 'cached' not in res5.flags
+                else:
+                    assert 'cached' in res5.flags
+
+    def test_process_exception(self):
+        with self._middleware() as mw:
+            res0 = Response(self.request.url, headers={'Expires': self.yesterday})
+            req0 = Request(self.request.url)
+            self._process_requestresponse(mw, req0, res0)
+            for e in mw.DOWNLOAD_EXCEPTIONS:
+                # Simulate encountering an error on download attempts
+                assert mw.process_request(req0, self.spider) is None
+                res1 = mw.process_exception(req0, e('foo'), self.spider)
+                # Use cached response as recovery
+                assert 'cached' in res1.flags
+                self.assertEqualResponse(res0, res1)
+            # Do not use cached response for unhandled exceptions
+            mw.process_request(req0, self.spider)
+            assert mw.process_exception(req0, Exception('foo'), self.spider) is None
+
+    def test_ignore_response_cache_controls(self):
+        sampledata = [
+            (200, {'Date': self.yesterday, 'Expires': self.tomorrow}),
+            (200, {'Date': self.yesterday, 'Cache-Control': 'no-store,max-age=86405'}),
+            (200, {'Age': '299', 'Cache-Control': 'max-age=300,no-cache'}),
+            (300, {'Cache-Control': 'no-cache'}),
+            (200, {'Expires': self.tomorrow, 'Cache-Control': 'no-store'}),
+        ]
+        with self._middleware(HTTPCACHE_IGNORE_RESPONSE_CACHE_CONTROLS=['no-cache', 'no-store']) as mw:
+            for idx, (status, headers) in enumerate(sampledata):
+                req0 = Request('http://example-%d.com' % idx)
+                res0 = Response(req0.url, status=status, headers=headers)
+                # cache fresh response
+                res1 = self._process_requestresponse(mw, req0, res0)
+                self.assertEqualResponse(res1, res0)
+                assert 'cached' not in res1.flags
+                # return fresh cached response without network interaction
+                res2 = self._process_requestresponse(mw, req0, None)
+                self.assertEqualResponse(res1, res2)
+                assert 'cached' in res2.flags
 
 
 if __name__ == '__main__':
diff --git a/tests/test_downloadermiddleware_httpcompression.py b/tests/test_downloadermiddleware_httpcompression.py
index 1cc6f44c1..a806f55ce 100644
--- a/tests/test_downloadermiddleware_httpcompression.py
+++ b/tests/test_downloadermiddleware_httpcompression.py
@@ -1,11 +1,13 @@
 from io import BytesIO
-from unittest import TestCase
-from os.path import join, abspath, dirname
+from unittest import TestCase, SkipTest
+from os.path import join
 from gzip import GzipFile
 
-from scrapy.spider import Spider
+from scrapy.spiders import Spider
 from scrapy.http import Response, Request, HtmlResponse
-from scrapy.contrib.downloadermiddleware.httpcompression import HttpCompressionMiddleware
+from scrapy.downloadermiddlewares.httpcompression import HttpCompressionMiddleware, ACCEPTED_ENCODINGS
+from scrapy.responsetypes import responsetypes
+from scrapy.utils.gz import gunzip
 from tests import tests_datadir
 from w3lib.encoding import resolve_encoding
 
@@ -13,11 +15,13 @@ from w3lib.encoding import resolve_encoding
 SAMPLEDIR = join(tests_datadir, 'compressed')
 
 FORMAT = {
-        'gzip': ('html-gzip.bin', 'gzip'),
-        'x-gzip': ('html-gzip.bin', 'gzip'),
-        'rawdeflate': ('html-rawdeflate.bin', 'deflate'),
-        'zlibdeflate': ('html-zlibdeflate.bin', 'deflate'),
-        }
+    'gzip': ('html-gzip.bin', 'gzip'),
+    'x-gzip': ('html-gzip.bin', 'gzip'),
+    'rawdeflate': ('html-rawdeflate.bin', 'deflate'),
+    'zlibdeflate': ('html-zlibdeflate.bin', 'deflate'),
+    'br': ('html-br.bin', 'br'),
+}
+
 
 class HttpCompressionTest(TestCase):
 
@@ -35,61 +39,75 @@ class HttpCompressionTest(TestCase):
             body = sample.read()
 
         headers = {
-                'Server': 'Yaws/1.49 Yet Another Web Server',
-                'Date': 'Sun, 08 Mar 2009 00:41:03 GMT',
-                'Content-Length': len(body),
-                'Content-Type': 'text/html',
-                'Content-Encoding': contentencoding,
-                }
+            'Server': 'Yaws/1.49 Yet Another Web Server',
+            'Date': 'Sun, 08 Mar 2009 00:41:03 GMT',
+            'Content-Length': len(body),
+            'Content-Type': 'text/html',
+            'Content-Encoding': contentencoding,
+        }
 
         response = Response('http://scrapytest.org/', body=body, headers=headers)
-        response.request = Request('http://scrapytest.org', headers={'Accept-Encoding': 'gzip,deflate'})
+        response.request = Request('http://scrapytest.org', headers={'Accept-Encoding': 'gzip, deflate'})
         return response
 
     def test_process_request(self):
         request = Request('http://scrapytest.org')
         assert 'Accept-Encoding' not in request.headers
         self.mw.process_request(request, self.spider)
-        self.assertEqual(request.headers.get('Accept-Encoding'), 'gzip,deflate')
+        self.assertEqual(request.headers.get('Accept-Encoding'),
+                         b', '.join(ACCEPTED_ENCODINGS))
 
     def test_process_response_gzip(self):
         response = self._getresponse('gzip')
         request = response.request
 
-        self.assertEqual(response.headers['Content-Encoding'], 'gzip')
+        self.assertEqual(response.headers['Content-Encoding'], b'gzip')
         newresponse = self.mw.process_response(request, response, self.spider)
         assert newresponse is not response
-        assert newresponse.body.startswith('<!DOCTYPE')
+        assert newresponse.body.startswith(b'<!DOCTYPE')
+        assert 'Content-Encoding' not in newresponse.headers
+
+    def test_process_response_br(self):
+        try:
+            import brotli  # noqa: F401
+        except ImportError:
+            raise SkipTest("no brotli")
+        response = self._getresponse('br')
+        request = response.request
+        self.assertEqual(response.headers['Content-Encoding'], b'br')
+        newresponse = self.mw.process_response(request, response, self.spider)
+        assert newresponse is not response
+        assert newresponse.body.startswith(b"<!DOCTYPE")
         assert 'Content-Encoding' not in newresponse.headers
 
     def test_process_response_rawdeflate(self):
         response = self._getresponse('rawdeflate')
         request = response.request
 
-        self.assertEqual(response.headers['Content-Encoding'], 'deflate')
+        self.assertEqual(response.headers['Content-Encoding'], b'deflate')
         newresponse = self.mw.process_response(request, response, self.spider)
         assert newresponse is not response
-        assert newresponse.body.startswith('<!DOCTYPE')
+        assert newresponse.body.startswith(b'<!DOCTYPE')
         assert 'Content-Encoding' not in newresponse.headers
 
     def test_process_response_zlibdelate(self):
         response = self._getresponse('zlibdeflate')
         request = response.request
 
-        self.assertEqual(response.headers['Content-Encoding'], 'deflate')
+        self.assertEqual(response.headers['Content-Encoding'], b'deflate')
         newresponse = self.mw.process_response(request, response, self.spider)
         assert newresponse is not response
-        assert newresponse.body.startswith('<!DOCTYPE')
+        assert newresponse.body.startswith(b'<!DOCTYPE')
         assert 'Content-Encoding' not in newresponse.headers
 
     def test_process_response_plain(self):
-        response = Response('http://scrapytest.org', body='<!DOCTYPE...')
+        response = Response('http://scrapytest.org', body=b'<!DOCTYPE...')
         request = Request('http://scrapytest.org')
 
         assert not response.headers.get('Content-Encoding')
         newresponse = self.mw.process_response(request, response, self.spider)
         assert newresponse is response
-        assert newresponse.body.startswith('<!DOCTYPE')
+        assert newresponse.body.startswith(b'<!DOCTYPE')
 
     def test_multipleencodings(self):
         response = self._getresponse('gzip')
@@ -97,7 +115,7 @@ class HttpCompressionTest(TestCase):
         request = response.request
         newresponse = self.mw.process_response(request, response, self.spider)
         assert newresponse is not response
-        self.assertEqual(newresponse.headers.getlist('Content-Encoding'), ['uuencode'])
+        self.assertEqual(newresponse.headers.getlist('Content-Encoding'), [b'uuencode'])
 
     def test_process_response_encoding_inside_body(self):
         headers = {
@@ -105,7 +123,8 @@ class HttpCompressionTest(TestCase):
             'Content-Encoding': 'gzip',
         }
         f = BytesIO()
-        plainbody = b"""<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=gb2312">"""
+        plainbody = (b'<html><head><title>Some page</title>'
+                     b'<meta http-equiv="Content-Type" content="text/html; charset=gb2312">')
         zf = GzipFile(fileobj=f, mode='wb')
         zf.write(plainbody)
         zf.close()
@@ -123,7 +142,8 @@ class HttpCompressionTest(TestCase):
             'Content-Encoding': 'gzip',
         }
         f = BytesIO()
-        plainbody = b"""<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=gb2312">"""
+        plainbody = (b'<html><head><title>Some page</title>'
+                     b'<meta http-equiv="Content-Type" content="text/html; charset=gb2312">')
         zf = GzipFile(fileobj=f, mode='wb')
         zf.write(plainbody)
         zf.close()
@@ -135,12 +155,99 @@ class HttpCompressionTest(TestCase):
         self.assertEqual(newresponse.body, plainbody)
         self.assertEqual(newresponse.encoding, resolve_encoding('gb2312'))
 
+    def test_process_response_no_content_type_header(self):
+        headers = {
+            'Content-Encoding': 'identity',
+        }
+        plainbody = (b'<html><head><title>Some page</title>'
+                     b'<meta http-equiv="Content-Type" content="text/html; charset=gb2312">')
+        respcls = responsetypes.from_args(url="http://www.example.com/index", headers=headers, body=plainbody)
+        response = respcls("http://www.example.com/index", headers=headers, body=plainbody)
+        request = Request("http://www.example.com/index")
+
+        newresponse = self.mw.process_response(request, response, self.spider)
+        assert isinstance(newresponse, respcls)
+        self.assertEqual(newresponse.body, plainbody)
+        self.assertEqual(newresponse.encoding, resolve_encoding('gb2312'))
+
     def test_process_response_gzipped_contenttype(self):
         response = self._getresponse('gzip')
         response.headers['Content-Type'] = 'application/gzip'
         request = response.request
 
+        newresponse = self.mw.process_response(request, response, self.spider)
+        self.assertIsNot(newresponse, response)
+        self.assertTrue(newresponse.body.startswith(b'<!DOCTYPE'))
+        self.assertNotIn('Content-Encoding', newresponse.headers)
+
+    def test_process_response_gzip_app_octetstream_contenttype(self):
+        response = self._getresponse('gzip')
+        response.headers['Content-Type'] = 'application/octet-stream'
+        request = response.request
+
+        newresponse = self.mw.process_response(request, response, self.spider)
+        self.assertIsNot(newresponse, response)
+        self.assertTrue(newresponse.body.startswith(b'<!DOCTYPE'))
+        self.assertNotIn('Content-Encoding', newresponse.headers)
+
+    def test_process_response_gzip_binary_octetstream_contenttype(self):
+        response = self._getresponse('x-gzip')
+        response.headers['Content-Type'] = 'binary/octet-stream'
+        request = response.request
+
+        newresponse = self.mw.process_response(request, response, self.spider)
+        self.assertIsNot(newresponse, response)
+        self.assertTrue(newresponse.body.startswith(b'<!DOCTYPE'))
+        self.assertNotIn('Content-Encoding', newresponse.headers)
+
+    def test_process_response_gzipped_gzip_file(self):
+        """Test that a gzip Content-Encoded .gz file is gunzipped
+        only once by the middleware, leaving gunzipping of the file
+        to upper layers.
+        """
+        headers = {
+            'Content-Type': 'application/gzip',
+            'Content-Encoding': 'gzip',
+        }
+        # build a gzipped file (here, a sitemap)
+        f = BytesIO()
+        plainbody = b"""<?xml version="1.0" encoding="UTF-8"?>
+<urlset xmlns="http://www.google.com/schemas/sitemap/0.84">
+  <url>
+    <loc>http://www.example.com/</loc>
+    <lastmod>2009-08-16</lastmod>
+    <changefreq>daily</changefreq>
+    <priority>1</priority>
+  </url>
+  <url>
+    <loc>http://www.example.com/Special-Offers.html</loc>
+    <lastmod>2009-08-16</lastmod>
+    <changefreq>weekly</changefreq>
+    <priority>0.8</priority>
+  </url>
+</urlset>"""
+        gz_file = GzipFile(fileobj=f, mode='wb')
+        gz_file.write(plainbody)
+        gz_file.close()
+
+        # build a gzipped response body containing this gzipped file
+        r = BytesIO()
+        gz_resp = GzipFile(fileobj=r, mode='wb')
+        gz_resp.write(f.getvalue())
+        gz_resp.close()
+
+        response = Response("http;//www.example.com/", headers=headers, body=r.getvalue())
+        request = Request("http://www.example.com/")
+
+        newresponse = self.mw.process_response(request, response, self.spider)
+        self.assertEqual(gunzip(newresponse.body), plainbody)
+
+    def test_process_response_head_request_no_decode_required(self):
+        response = self._getresponse('gzip')
+        response.headers['Content-Type'] = 'application/gzip'
+        request = response.request
+        request.method = 'HEAD'
+        response = response.replace(body=None)
         newresponse = self.mw.process_response(request, response, self.spider)
         self.assertIs(newresponse, response)
-        self.assertEqual(response.headers['Content-Encoding'], 'gzip')
-        self.assertEqual(response.headers['Content-Type'], 'application/gzip')
+        self.assertEqual(response.body, b'')
diff --git a/tests/test_downloadermiddleware_httpproxy.py b/tests/test_downloadermiddleware_httpproxy.py
index 58825c6cf..351631eb8 100644
--- a/tests/test_downloadermiddleware_httpproxy.py
+++ b/tests/test_downloadermiddleware_httpproxy.py
@@ -1,15 +1,18 @@
 import os
-import sys
-from twisted.trial.unittest import TestCase, SkipTest
+from functools import partial
+from twisted.trial.unittest import TestCase
 
-from scrapy.contrib.downloadermiddleware.httpproxy import HttpProxyMiddleware
+from scrapy.downloadermiddlewares.httpproxy import HttpProxyMiddleware
 from scrapy.exceptions import NotConfigured
-from scrapy.http import Response, Request
-from scrapy.spider import Spider
+from scrapy.http import Request
+from scrapy.spiders import Spider
+from scrapy.crawler import Crawler
+from scrapy.settings import Settings
 
 spider = Spider('foo')
 
-class TestDefaultHeadersMiddleware(TestCase):
+
+class TestHttpProxyMiddleware(TestCase):
 
     failureException = AssertionError
 
@@ -19,51 +22,107 @@ class TestDefaultHeadersMiddleware(TestCase):
     def tearDown(self):
         os.environ = self._oldenv
 
-    def test_no_proxies(self):
-        os.environ = {}
-        self.assertRaises(NotConfigured, HttpProxyMiddleware)
+    def test_not_enabled(self):
+        settings = Settings({'HTTPPROXY_ENABLED': False})
+        crawler = Crawler(Spider, settings)
+        self.assertRaises(NotConfigured, partial(HttpProxyMiddleware.from_crawler, crawler))
 
-    def test_no_enviroment_proxies(self):
+    def test_no_environment_proxies(self):
         os.environ = {'dummy_proxy': 'reset_env_and_do_not_raise'}
         mw = HttpProxyMiddleware()
 
         for url in ('http://e.com', 'https://e.com', 'file:///tmp/a'):
             req = Request(url)
             assert mw.process_request(req, spider) is None
-            self.assertEquals(req.url, url)
-            self.assertEquals(req.meta, {})
+            self.assertEqual(req.url, url)
+            self.assertEqual(req.meta, {})
 
-    def test_enviroment_proxies(self):
+    def test_environment_proxies(self):
         os.environ['http_proxy'] = http_proxy = 'https://proxy.for.http:3128'
         os.environ['https_proxy'] = https_proxy = 'http://proxy.for.https:8080'
         os.environ.pop('file_proxy', None)
         mw = HttpProxyMiddleware()
 
-        for url, proxy in [('http://e.com', http_proxy),
-                ('https://e.com', https_proxy), ('file://tmp/a', None)]:
+        for url, proxy in [
+            ('http://e.com', http_proxy),
+            ('https://e.com', https_proxy),
+            ('file://tmp/a', None),
+        ]:
             req = Request(url)
             assert mw.process_request(req, spider) is None
-            self.assertEquals(req.url, url)
-            self.assertEquals(req.meta.get('proxy'), proxy)
+            self.assertEqual(req.url, url)
+            self.assertEqual(req.meta.get('proxy'), proxy)
+
+    def test_proxy_precedence_meta(self):
+        os.environ['http_proxy'] = 'https://proxy.com'
+        mw = HttpProxyMiddleware()
+        req = Request('http://scrapytest.org', meta={'proxy': 'https://new.proxy:3128'})
+        assert mw.process_request(req, spider) is None
+        self.assertEqual(req.meta, {'proxy': 'https://new.proxy:3128'})
 
     def test_proxy_auth(self):
         os.environ['http_proxy'] = 'https://user:pass@proxy:3128'
         mw = HttpProxyMiddleware()
         req = Request('http://scrapytest.org')
         assert mw.process_request(req, spider) is None
-        self.assertEquals(req.meta, {'proxy': 'https://proxy:3128'})
-        self.assertEquals(req.headers.get('Proxy-Authorization'), 'Basic dXNlcjpwYXNz')
+        self.assertEqual(req.meta, {'proxy': 'https://proxy:3128'})
+        self.assertEqual(req.headers.get('Proxy-Authorization'), b'Basic dXNlcjpwYXNz')
+        # proxy from request.meta
+        req = Request('http://scrapytest.org', meta={'proxy': 'https://username:password@proxy:3128'})
+        assert mw.process_request(req, spider) is None
+        self.assertEqual(req.meta, {'proxy': 'https://proxy:3128'})
+        self.assertEqual(req.headers.get('Proxy-Authorization'), b'Basic dXNlcm5hbWU6cGFzc3dvcmQ=')
+
+    def test_proxy_auth_empty_passwd(self):
+        os.environ['http_proxy'] = 'https://user:@proxy:3128'
+        mw = HttpProxyMiddleware()
+        req = Request('http://scrapytest.org')
+        assert mw.process_request(req, spider) is None
+        self.assertEqual(req.meta, {'proxy': 'https://proxy:3128'})
+        self.assertEqual(req.headers.get('Proxy-Authorization'), b'Basic dXNlcjo=')
+        # proxy from request.meta
+        req = Request('http://scrapytest.org', meta={'proxy': 'https://username:@proxy:3128'})
+        assert mw.process_request(req, spider) is None
+        self.assertEqual(req.meta, {'proxy': 'https://proxy:3128'})
+        self.assertEqual(req.headers.get('Proxy-Authorization'), b'Basic dXNlcm5hbWU6')
+
+    def test_proxy_auth_encoding(self):
+        # utf-8 encoding
+        os.environ['http_proxy'] = 'https://m\u00E1n:pass@proxy:3128'
+        mw = HttpProxyMiddleware(auth_encoding='utf-8')
+        req = Request('http://scrapytest.org')
+        assert mw.process_request(req, spider) is None
+        self.assertEqual(req.meta, {'proxy': 'https://proxy:3128'})
+        self.assertEqual(req.headers.get('Proxy-Authorization'), b'Basic bcOhbjpwYXNz')
+
+        # proxy from request.meta
+        req = Request('http://scrapytest.org', meta={'proxy': 'https://\u00FCser:pass@proxy:3128'})
+        assert mw.process_request(req, spider) is None
+        self.assertEqual(req.meta, {'proxy': 'https://proxy:3128'})
+        self.assertEqual(req.headers.get('Proxy-Authorization'), b'Basic w7xzZXI6cGFzcw==')
+
+        # default latin-1 encoding
+        mw = HttpProxyMiddleware(auth_encoding='latin-1')
+        req = Request('http://scrapytest.org')
+        assert mw.process_request(req, spider) is None
+        self.assertEqual(req.meta, {'proxy': 'https://proxy:3128'})
+        self.assertEqual(req.headers.get('Proxy-Authorization'), b'Basic beFuOnBhc3M=')
+
+        # proxy from request.meta, latin-1 encoding
+        req = Request('http://scrapytest.org', meta={'proxy': 'https://\u00FCser:pass@proxy:3128'})
+        assert mw.process_request(req, spider) is None
+        self.assertEqual(req.meta, {'proxy': 'https://proxy:3128'})
+        self.assertEqual(req.headers.get('Proxy-Authorization'), b'Basic /HNlcjpwYXNz')
 
     def test_proxy_already_seted(self):
-        os.environ['http_proxy'] = http_proxy = 'https://proxy.for.http:3128'
+        os.environ['http_proxy'] = 'https://proxy.for.http:3128'
         mw = HttpProxyMiddleware()
         req = Request('http://noproxy.com', meta={'proxy': None})
         assert mw.process_request(req, spider) is None
         assert 'proxy' in req.meta and req.meta['proxy'] is None
 
-
     def test_no_proxy(self):
-        os.environ['http_proxy'] = http_proxy = 'https://proxy.for.http:3128'
+        os.environ['http_proxy'] = 'https://proxy.for.http:3128'
         mw = HttpProxyMiddleware()
 
         os.environ['no_proxy'] = '*'
@@ -81,3 +140,8 @@ class TestDefaultHeadersMiddleware(TestCase):
         assert mw.process_request(req, spider) is None
         assert 'proxy' not in req.meta
 
+        # proxy from meta['proxy'] takes precedence
+        os.environ['no_proxy'] = '*'
+        req = Request('http://noproxy.com', meta={'proxy': 'http://proxy.com'})
+        assert mw.process_request(req, spider) is None
+        self.assertEqual(req.meta, {'proxy': 'http://proxy.com'})
diff --git a/tests/test_downloadermiddleware_redirect.py b/tests/test_downloadermiddleware_redirect.py
index 9673d4594..131332131 100644
--- a/tests/test_downloadermiddleware_redirect.py
+++ b/tests/test_downloadermiddleware_redirect.py
@@ -1,7 +1,7 @@
 import unittest
 
-from scrapy.contrib.downloadermiddleware.redirect import RedirectMiddleware, MetaRefreshMiddleware
-from scrapy.spider import Spider
+from scrapy.downloadermiddlewares.redirect import RedirectMiddleware, MetaRefreshMiddleware
+from scrapy.spiders import Spider
 from scrapy.exceptions import IgnoreRequest
 from scrapy.http import Request, Response, HtmlResponse
 from scrapy.utils.test import get_crawler
@@ -10,9 +10,9 @@ from scrapy.utils.test import get_crawler
 class RedirectMiddlewareTest(unittest.TestCase):
 
     def setUp(self):
-        crawler = get_crawler()
-        self.spider = Spider('foo')
-        self.mw = RedirectMiddleware.from_crawler(crawler)
+        self.crawler = get_crawler(Spider)
+        self.spider = self.crawler._create_spider('foo')
+        self.mw = RedirectMiddleware.from_crawler(self.crawler)
 
     def test_priority_adjust(self):
         req = Request('http://a.com')
@@ -20,12 +20,12 @@ class RedirectMiddlewareTest(unittest.TestCase):
         req2 = self.mw.process_response(req, rsp, self.spider)
         assert req2.priority > req.priority
 
-    def test_redirect_301(self):
-        def _test(method):
-            url = 'http://www.example.com/301'
+    def test_redirect_3xx_permanent(self):
+        def _test(method, status=301):
+            url = 'http://www.example.com/{}'.format(status)
             url2 = 'http://www.example.com/redirected'
             req = Request(url, method=method)
-            rsp = Response(url, headers={'Location': url2}, status=301)
+            rsp = Response(url, headers={'Location': url2}, status=status)
 
             req2 = self.mw.process_response(req, rsp, self.spider)
             assert isinstance(req2, Request)
@@ -40,6 +40,14 @@ class RedirectMiddlewareTest(unittest.TestCase):
         _test('POST')
         _test('HEAD')
 
+        _test('GET', status=307)
+        _test('POST', status=307)
+        _test('HEAD', status=307)
+
+        _test('GET', status=308)
+        _test('POST', status=308)
+        _test('HEAD', status=308)
+
     def test_dont_redirect(self):
         url = 'http://www.example.com/301'
         url2 = 'http://www.example.com/redirected'
@@ -58,24 +66,20 @@ class RedirectMiddlewareTest(unittest.TestCase):
         assert isinstance(r, Response)
         assert r is rsp
 
-
     def test_redirect_302(self):
         url = 'http://www.example.com/302'
         url2 = 'http://www.example.com/redirected2'
         req = Request(url, method='POST', body='test',
-            headers={'Content-Type': 'text/plain', 'Content-length': '4'})
+                      headers={'Content-Type': 'text/plain', 'Content-length': '4'})
         rsp = Response(url, headers={'Location': url2}, status=302)
 
         req2 = self.mw.process_response(req, rsp, self.spider)
         assert isinstance(req2, Request)
         self.assertEqual(req2.url, url2)
         self.assertEqual(req2.method, 'GET')
-        assert 'Content-Type' not in req2.headers, \
-            "Content-Type header must not be present in redirected request"
-        assert 'Content-Length' not in req2.headers, \
-            "Content-Length header must not be present in redirected request"
-        assert not req2.body, \
-            "Redirected body must be empty, not '%s'" % req2.body
+        assert 'Content-Type' not in req2.headers, "Content-Type header must not be present in redirected request"
+        assert 'Content-Length' not in req2.headers, "Content-Length header must not be present in redirected request"
+        assert not req2.body, "Redirected body must be empty, not '%s'" % req2.body
 
         # response without Location header but with status code is 3XX should be ignored
         del rsp.headers['Location']
@@ -96,6 +100,21 @@ class RedirectMiddlewareTest(unittest.TestCase):
         del rsp.headers['Location']
         assert self.mw.process_response(req, rsp, self.spider) is rsp
 
+    def test_redirect_302_relative(self):
+        url = 'http://www.example.com/302'
+        url2 = '///i8n.example2.com/302'
+        url3 = 'http://i8n.example2.com/302'
+        req = Request(url, method='HEAD')
+        rsp = Response(url, headers={'Location': url2}, status=302)
+
+        req2 = self.mw.process_response(req, rsp, self.spider)
+        assert isinstance(req2, Request)
+        self.assertEqual(req2.url, url3)
+        self.assertEqual(req2.method, 'HEAD')
+
+        # response without Location header but with status code is 3XX should be ignored
+        del rsp.headers['Location']
+        assert self.mw.process_response(req, rsp, self.spider) is rsp
 
     def test_max_redirect_times(self):
         self.mw.max_redirect_times = 1
@@ -127,18 +146,69 @@ class RedirectMiddlewareTest(unittest.TestCase):
         self.assertEqual(req2.url, 'http://scrapytest.org/redirected')
         self.assertEqual(req2.meta['redirect_urls'], ['http://scrapytest.org/first'])
         self.assertEqual(req3.url, 'http://scrapytest.org/redirected2')
-        self.assertEqual(req3.meta['redirect_urls'], ['http://scrapytest.org/first', 'http://scrapytest.org/redirected'])
+        self.assertEqual(
+            req3.meta['redirect_urls'],
+            ['http://scrapytest.org/first', 'http://scrapytest.org/redirected']
+        )
+
+    def test_redirect_reasons(self):
+        req1 = Request('http://scrapytest.org/first')
+        rsp1 = Response('http://scrapytest.org/first', headers={'Location': '/redirected1'}, status=301)
+        req2 = self.mw.process_response(req1, rsp1, self.spider)
+        rsp2 = Response('http://scrapytest.org/redirected1', headers={'Location': '/redirected2'}, status=301)
+        req3 = self.mw.process_response(req2, rsp2, self.spider)
+
+        self.assertEqual(req2.meta['redirect_reasons'], [301])
+        self.assertEqual(req3.meta['redirect_reasons'], [301, 301])
+
+    def test_spider_handling(self):
+        smartspider = self.crawler._create_spider('smarty')
+        smartspider.handle_httpstatus_list = [404, 301, 302]
+        url = 'http://www.example.com/301'
+        url2 = 'http://www.example.com/redirected'
+        req = Request(url)
+        rsp = Response(url, headers={'Location': url2}, status=301)
+        r = self.mw.process_response(req, rsp, smartspider)
+        self.assertIs(r, rsp)
+
+    def test_request_meta_handling(self):
+        url = 'http://www.example.com/301'
+        url2 = 'http://www.example.com/redirected'
+
+        def _test_passthrough(req):
+            rsp = Response(url, headers={'Location': url2}, status=301, request=req)
+            r = self.mw.process_response(req, rsp, self.spider)
+            self.assertIs(r, rsp)
+        _test_passthrough(Request(url, meta={'handle_httpstatus_list': [404, 301, 302]}))
+        _test_passthrough(Request(url, meta={'handle_httpstatus_all': True}))
+
+    def test_latin1_location(self):
+        req = Request('http://scrapytest.org/first')
+        latin1_location = '/ação'.encode('latin1')  # HTTP historically supports latin1
+        resp = Response('http://scrapytest.org/first', headers={'Location': latin1_location}, status=302)
+        req_result = self.mw.process_response(req, resp, self.spider)
+        perc_encoded_utf8_url = 'http://scrapytest.org/a%E7%E3o'
+        self.assertEqual(perc_encoded_utf8_url, req_result.url)
+
+    def test_utf8_location(self):
+        req = Request('http://scrapytest.org/first')
+        utf8_location = '/ação'.encode('utf-8')  # header using UTF-8 encoding
+        resp = Response('http://scrapytest.org/first', headers={'Location': utf8_location}, status=302)
+        req_result = self.mw.process_response(req, resp, self.spider)
+        perc_encoded_utf8_url = 'http://scrapytest.org/a%C3%A7%C3%A3o'
+        self.assertEqual(perc_encoded_utf8_url, req_result.url)
+
 
 class MetaRefreshMiddlewareTest(unittest.TestCase):
 
     def setUp(self):
-        crawler = get_crawler()
-        self.spider = Spider('foo')
+        crawler = get_crawler(Spider)
+        self.spider = crawler._create_spider('foo')
         self.mw = MetaRefreshMiddleware.from_crawler(crawler)
 
     def _body(self, interval=5, url='http://example.org/newpage'):
-        return """<html><head><meta http-equiv="refresh" content="{0};url={1}"/></head></html>"""\
-                .format(interval, url)
+        html = """<html><head><meta http-equiv="refresh" content="{0};url={1}"/></head></html>"""
+        return html.format(interval, url).encode('utf-8')
 
     def test_priority_adjust(self):
         req = Request('http://a.com')
@@ -156,7 +226,9 @@ class MetaRefreshMiddlewareTest(unittest.TestCase):
     def test_meta_refresh_with_high_interval(self):
         # meta-refresh with high intervals don't trigger redirects
         req = Request(url='http://example.org')
-        rsp = HtmlResponse(url='http://example.org', body=self._body(interval=1000))
+        rsp = HtmlResponse(url='http://example.org',
+                           body=self._body(interval=1000),
+                           encoding='utf-8')
         rsp2 = self.mw.process_response(req, rsp, self.spider)
         assert rsp is rsp2
 
@@ -169,12 +241,9 @@ class MetaRefreshMiddlewareTest(unittest.TestCase):
         assert isinstance(req2, Request)
         self.assertEqual(req2.url, 'http://example.org/newpage')
         self.assertEqual(req2.method, 'GET')
-        assert 'Content-Type' not in req2.headers, \
-            "Content-Type header must not be present in redirected request"
-        assert 'Content-Length' not in req2.headers, \
-            "Content-Length header must not be present in redirected request"
-        assert not req2.body, \
-            "Redirected body must be empty, not '%s'" % req2.body
+        assert 'Content-Type' not in req2.headers, "Content-Type header must not be present in redirected request"
+        assert 'Content-Length' not in req2.headers, "Content-Length header must not be present in redirected request"
+        assert not req2.body, "Redirected body must be empty, not '%s'" % req2.body
 
     def test_max_redirect_times(self):
         self.mw.max_redirect_times = 1
@@ -207,7 +276,42 @@ class MetaRefreshMiddlewareTest(unittest.TestCase):
         self.assertEqual(req2.url, 'http://scrapytest.org/redirected')
         self.assertEqual(req2.meta['redirect_urls'], ['http://scrapytest.org/first'])
         self.assertEqual(req3.url, 'http://scrapytest.org/redirected2')
-        self.assertEqual(req3.meta['redirect_urls'], ['http://scrapytest.org/first', 'http://scrapytest.org/redirected'])
+        self.assertEqual(
+            req3.meta['redirect_urls'],
+            ['http://scrapytest.org/first', 'http://scrapytest.org/redirected']
+        )
+
+    def test_redirect_reasons(self):
+        req1 = Request('http://scrapytest.org/first')
+        rsp1 = HtmlResponse('http://scrapytest.org/first', body=self._body(url='/redirected'))
+        req2 = self.mw.process_response(req1, rsp1, self.spider)
+        rsp2 = HtmlResponse('http://scrapytest.org/redirected', body=self._body(url='/redirected1'))
+        req3 = self.mw.process_response(req2, rsp2, self.spider)
+
+        self.assertEqual(req2.meta['redirect_reasons'], ['meta refresh'])
+        self.assertEqual(req3.meta['redirect_reasons'], ['meta refresh', 'meta refresh'])
+
+    def test_ignore_tags_default(self):
+        req = Request(url='http://example.org')
+        body = ('''<noscript><meta http-equiv="refresh" '''
+                '''content="0;URL='http://example.org/newpage'"></noscript>''')
+        rsp = HtmlResponse(req.url, body=body.encode())
+        req2 = self.mw.process_response(req, rsp, self.spider)
+        assert isinstance(req2, Request)
+        self.assertEqual(req2.url, 'http://example.org/newpage')
+
+    def test_ignore_tags_1_x_list(self):
+        """Test that Scrapy 1.x behavior remains possible"""
+        settings = {'METAREFRESH_IGNORE_TAGS': ['script', 'noscript']}
+        crawler = get_crawler(Spider, settings)
+        mw = MetaRefreshMiddleware.from_crawler(crawler)
+        req = Request(url='http://example.org')
+        body = ('''<noscript><meta http-equiv="refresh" '''
+                '''content="0;URL='http://example.org/newpage'"></noscript>''')
+        rsp = HtmlResponse(req.url, body=body.encode())
+        response = mw.process_response(req, rsp, self.spider)
+        assert isinstance(response, Response)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_downloadermiddleware_retry.py b/tests/test_downloadermiddleware_retry.py
index 166c2bff6..29357ba94 100644
--- a/tests/test_downloadermiddleware_retry.py
+++ b/tests/test_downloadermiddleware_retry.py
@@ -1,40 +1,45 @@
 import unittest
 from twisted.internet import defer
-from twisted.internet.error import TimeoutError, DNSLookupError, \
-        ConnectionRefusedError, ConnectionDone, ConnectError, \
-        ConnectionLost, TCPTimedOutError
+from twisted.internet.error import (
+    ConnectError,
+    ConnectionDone,
+    ConnectionLost,
+    ConnectionRefusedError,
+    DNSLookupError,
+    TCPTimedOutError,
+    TimeoutError,
+)
+from twisted.web.client import ResponseFailed
 
-from scrapy import optional_features
-from scrapy.contrib.downloadermiddleware.retry import RetryMiddleware
-from scrapy.xlib.tx import ResponseFailed
-from scrapy.spider import Spider
+from scrapy.downloadermiddlewares.retry import RetryMiddleware
+from scrapy.spiders import Spider
 from scrapy.http import Request, Response
 from scrapy.utils.test import get_crawler
 
 
 class RetryTest(unittest.TestCase):
     def setUp(self):
-        crawler = get_crawler()
-        self.spider = Spider('foo')
-        self.mw = RetryMiddleware.from_crawler(crawler)
+        self.crawler = get_crawler(Spider)
+        self.spider = self.crawler._create_spider('foo')
+        self.mw = RetryMiddleware.from_crawler(self.crawler)
         self.mw.max_retry_times = 2
 
     def test_priority_adjust(self):
         req = Request('http://www.scrapytest.org/503')
-        rsp = Response('http://www.scrapytest.org/503', body='', status=503)
+        rsp = Response('http://www.scrapytest.org/503', body=b'', status=503)
         req2 = self.mw.process_response(req, rsp, self.spider)
         assert req2.priority < req.priority
 
     def test_404(self):
         req = Request('http://www.scrapytest.org/404')
-        rsp = Response('http://www.scrapytest.org/404', body='', status=404)
+        rsp = Response('http://www.scrapytest.org/404', body=b'', status=404)
 
         # dont retry 404s
         assert self.mw.process_response(req, rsp, self.spider) is rsp
 
     def test_dont_retry(self):
         req = Request('http://www.scrapytest.org/503', meta={'dont_retry': True})
-        rsp = Response('http://www.scrapytest.org/503', body='', status=503)
+        rsp = Response('http://www.scrapytest.org/503', body=b'', status=503)
 
         # first retry
         r = self.mw.process_response(req, rsp, self.spider)
@@ -56,7 +61,7 @@ class RetryTest(unittest.TestCase):
 
     def test_503(self):
         req = Request('http://www.scrapytest.org/503')
-        rsp = Response('http://www.scrapytest.org/503', body='', status=503)
+        rsp = Response('http://www.scrapytest.org/503', body=b'', status=503)
 
         # first retry
         req = self.mw.process_response(req, rsp, self.spider)
@@ -71,17 +76,32 @@ class RetryTest(unittest.TestCase):
         # discard it
         assert self.mw.process_response(req, rsp, self.spider) is rsp
 
+        assert self.crawler.stats.get_value('retry/max_reached') == 1
+        assert self.crawler.stats.get_value('retry/reason_count/503 Service Unavailable') == 2
+        assert self.crawler.stats.get_value('retry/count') == 2
+
     def test_twistederrors(self):
-        exceptions = [defer.TimeoutError, TCPTimedOutError, TimeoutError,
-                DNSLookupError, ConnectionRefusedError, ConnectionDone,
-                ConnectError, ConnectionLost]
-        if 'http11' in optional_features:
-            exceptions.append(ResponseFailed)
+        exceptions = [
+            ConnectError,
+            ConnectionDone,
+            ConnectionLost,
+            ConnectionRefusedError,
+            defer.TimeoutError,
+            DNSLookupError,
+            ResponseFailed,
+            TCPTimedOutError,
+            TimeoutError,
+        ]
 
         for exc in exceptions:
             req = Request('http://www.scrapytest.org/%s' % exc.__name__)
             self._test_retry_exception(req, exc('foo'))
 
+        stats = self.crawler.stats
+        assert stats.get_value('retry/max_reached') == len(exceptions)
+        assert stats.get_value('retry/count') == len(exceptions) * 2
+        assert stats.get_value('retry/reason_count/twisted.internet.defer.TimeoutError') == 2
+
     def _test_retry_exception(self, req, exception):
         # first retry
         req = self.mw.process_exception(req, exception, self.spider)
@@ -98,5 +118,83 @@ class RetryTest(unittest.TestCase):
         self.assertEqual(req, None)
 
 
+class MaxRetryTimesTest(unittest.TestCase):
+    def setUp(self):
+        self.crawler = get_crawler(Spider)
+        self.spider = self.crawler._create_spider('foo')
+        self.mw = RetryMiddleware.from_crawler(self.crawler)
+        self.mw.max_retry_times = 2
+        self.invalid_url = 'http://www.scrapytest.org/invalid_url'
+
+    def test_with_settings_zero(self):
+
+        # SETTINGS: RETRY_TIMES = 0
+        self.mw.max_retry_times = 0
+
+        req = Request(self.invalid_url)
+        self._test_retry(req, DNSLookupError('foo'), self.mw.max_retry_times)
+
+    def test_with_metakey_zero(self):
+
+        # SETTINGS: meta(max_retry_times) = 0
+        meta_max_retry_times = 0
+
+        req = Request(self.invalid_url, meta={'max_retry_times': meta_max_retry_times})
+        self._test_retry(req, DNSLookupError('foo'), meta_max_retry_times)
+
+    def test_without_metakey(self):
+
+        # SETTINGS: RETRY_TIMES is NON-ZERO
+        self.mw.max_retry_times = 5
+
+        req = Request(self.invalid_url)
+        self._test_retry(req, DNSLookupError('foo'), self.mw.max_retry_times)
+
+    def test_with_metakey_greater(self):
+
+        # SETINGS: RETRY_TIMES < meta(max_retry_times)
+        self.mw.max_retry_times = 2
+        meta_max_retry_times = 3
+
+        req1 = Request(self.invalid_url, meta={'max_retry_times': meta_max_retry_times})
+        req2 = Request(self.invalid_url)
+
+        self._test_retry(req1, DNSLookupError('foo'), meta_max_retry_times)
+        self._test_retry(req2, DNSLookupError('foo'), self.mw.max_retry_times)
+
+    def test_with_metakey_lesser(self):
+
+        # SETINGS: RETRY_TIMES > meta(max_retry_times)
+        self.mw.max_retry_times = 5
+        meta_max_retry_times = 4
+
+        req1 = Request(self.invalid_url, meta={'max_retry_times': meta_max_retry_times})
+        req2 = Request(self.invalid_url)
+
+        self._test_retry(req1, DNSLookupError('foo'), meta_max_retry_times)
+        self._test_retry(req2, DNSLookupError('foo'), self.mw.max_retry_times)
+
+    def test_with_dont_retry(self):
+
+        # SETTINGS: meta(max_retry_times) = 4
+        meta_max_retry_times = 4
+
+        req = Request(self.invalid_url, meta={
+            'max_retry_times': meta_max_retry_times, 'dont_retry': True
+        })
+
+        self._test_retry(req, DNSLookupError('foo'), 0)
+
+    def _test_retry(self, req, exception, max_retry_times):
+
+        for i in range(0, max_retry_times):
+            req = self.mw.process_exception(req, exception, self.spider)
+            assert isinstance(req, Request)
+
+        # discard it
+        req = self.mw.process_exception(req, exception, self.spider)
+        self.assertEqual(req, None)
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_downloadermiddleware_robotstxt.py b/tests/test_downloadermiddleware_robotstxt.py
index 31d7f0087..858138f81 100644
--- a/tests/test_downloadermiddleware_robotstxt.py
+++ b/tests/test_downloadermiddleware_robotstxt.py
@@ -1,46 +1,202 @@
-import re
-import mock
-from twisted.internet import reactor
-from twisted.internet.defer import Deferred
+from unittest import mock
+
+from twisted.internet import reactor, error
+from twisted.internet.defer import Deferred, DeferredList, maybeDeferred
+from twisted.python import failure
 from twisted.trial import unittest
-from scrapy.contrib.downloadermiddleware.robotstxt import RobotsTxtMiddleware
+from scrapy.downloadermiddlewares.robotstxt import (RobotsTxtMiddleware,
+                                                    logger as mw_module_logger)
 from scrapy.exceptions import IgnoreRequest, NotConfigured
-from scrapy.http import Request, Response
+from scrapy.http import Request, Response, TextResponse
 from scrapy.settings import Settings
+from tests.test_robotstxt_interface import rerp_available, reppy_available
 
 
 class RobotsTxtMiddlewareTest(unittest.TestCase):
 
-    def test(self):
-        crawler = mock.MagicMock()
-        crawler.settings = Settings()
-        crawler.settings.set('USER_AGENT', 'CustomAgent')
-        self.assertRaises(NotConfigured, RobotsTxtMiddleware, crawler)
+    def setUp(self):
+        self.crawler = mock.MagicMock()
+        self.crawler.settings = Settings()
+        self.crawler.engine.download = mock.MagicMock()
+
+    def tearDown(self):
+        del self.crawler
+
+    def test_robotstxt_settings(self):
+        self.crawler.settings = Settings()
+        self.crawler.settings.set('USER_AGENT', 'CustomAgent')
+        self.assertRaises(NotConfigured, RobotsTxtMiddleware, self.crawler)
+
+    def _get_successful_crawler(self):
+        crawler = self.crawler
         crawler.settings.set('ROBOTSTXT_OBEY', True)
-        crawler.engine.download = mock.MagicMock()
-        ROBOTS = re.sub(r'^\s+(?m)', '', '''
-        User-Agent: *
-        Disallow: /admin/
-        Disallow: /static/
-        ''')
-        response = Response('http://site.local/robots.txt', body=ROBOTS)
+        ROBOTS = """
+User-Agent: *
+Disallow: /admin/
+Disallow: /static/
+# taken from https://en.wikipedia.org/robots.txt
+Disallow: /wiki/K%C3%A4ytt%C3%A4j%C3%A4:
+Disallow: /wiki/Käyttäjä:
+User-Agent: UnicödeBöt
+Disallow: /some/randome/page.html
+""".encode('utf-8')
+        response = TextResponse('http://site.local/robots.txt', body=ROBOTS)
+
         def return_response(request, spider):
             deferred = Deferred()
             reactor.callFromThread(deferred.callback, response)
             return deferred
         crawler.engine.download.side_effect = return_response
-        middleware = RobotsTxtMiddleware(crawler)
-        spider = None  # not actually used
-        # There is a bit of neglect in robotstxt.py: robots.txt is fetched asynchronously,
-        # and it is actually fetched only *after* first process_request completes.
-        # So, first process_request will always succeed.
-        # We defer test() because otherwise robots.txt download mock will be called after assertRaises failure.
-        self.assertIsNone(middleware.process_request(Request('http://site.local'), spider))  # not affected by robots.txt
-        def test(r):
-            self.assertIsNone(middleware.process_request(Request('http://site.local/allowed'), spider))
-            self.assertRaises(IgnoreRequest, middleware.process_request, Request('http://site.local/admin/main'), spider)
-            self.assertRaises(IgnoreRequest, middleware.process_request, Request('http://site.local/static/'), spider)
-        deferred = Deferred()
-        deferred.addCallback(test)
-        reactor.callFromThread(deferred.callback, None)
+        return crawler
+
+    def test_robotstxt(self):
+        middleware = RobotsTxtMiddleware(self._get_successful_crawler())
+        return DeferredList([
+            self.assertNotIgnored(Request('http://site.local/allowed'), middleware),
+            self.assertIgnored(Request('http://site.local/admin/main'), middleware),
+            self.assertIgnored(Request('http://site.local/static/'), middleware),
+            self.assertIgnored(Request('http://site.local/wiki/K%C3%A4ytt%C3%A4j%C3%A4:'), middleware),
+            self.assertIgnored(Request('http://site.local/wiki/Käyttäjä:'), middleware)
+        ], fireOnOneErrback=True)
+
+    def test_robotstxt_ready_parser(self):
+        middleware = RobotsTxtMiddleware(self._get_successful_crawler())
+        d = self.assertNotIgnored(Request('http://site.local/allowed'), middleware)
+        d.addCallback(lambda _: self.assertNotIgnored(Request('http://site.local/allowed'), middleware))
+        return d
+
+    def test_robotstxt_meta(self):
+        middleware = RobotsTxtMiddleware(self._get_successful_crawler())
+        meta = {'dont_obey_robotstxt': True}
+        return DeferredList([
+            self.assertNotIgnored(Request('http://site.local/allowed', meta=meta), middleware),
+            self.assertNotIgnored(Request('http://site.local/admin/main', meta=meta), middleware),
+            self.assertNotIgnored(Request('http://site.local/static/', meta=meta), middleware)
+        ], fireOnOneErrback=True)
+
+    def _get_garbage_crawler(self):
+        crawler = self.crawler
+        crawler.settings.set('ROBOTSTXT_OBEY', True)
+        response = Response('http://site.local/robots.txt', body=b'GIF89a\xd3\x00\xfe\x00\xa2')
+
+        def return_response(request, spider):
+            deferred = Deferred()
+            reactor.callFromThread(deferred.callback, response)
+            return deferred
+        crawler.engine.download.side_effect = return_response
+        return crawler
+
+    def test_robotstxt_garbage(self):
+        # garbage response should be discarded, equal 'allow all'
+        middleware = RobotsTxtMiddleware(self._get_garbage_crawler())
+        deferred = DeferredList([
+            self.assertNotIgnored(Request('http://site.local'), middleware),
+            self.assertNotIgnored(Request('http://site.local/allowed'), middleware),
+            self.assertNotIgnored(Request('http://site.local/admin/main'), middleware),
+            self.assertNotIgnored(Request('http://site.local/static/'), middleware)
+        ], fireOnOneErrback=True)
         return deferred
+
+    def _get_emptybody_crawler(self):
+        crawler = self.crawler
+        crawler.settings.set('ROBOTSTXT_OBEY', True)
+        response = Response('http://site.local/robots.txt')
+
+        def return_response(request, spider):
+            deferred = Deferred()
+            reactor.callFromThread(deferred.callback, response)
+            return deferred
+        crawler.engine.download.side_effect = return_response
+        return crawler
+
+    def test_robotstxt_empty_response(self):
+        # empty response should equal 'allow all'
+        middleware = RobotsTxtMiddleware(self._get_emptybody_crawler())
+        return DeferredList([
+            self.assertNotIgnored(Request('http://site.local/allowed'), middleware),
+            self.assertNotIgnored(Request('http://site.local/admin/main'), middleware),
+            self.assertNotIgnored(Request('http://site.local/static/'), middleware)
+        ], fireOnOneErrback=True)
+
+    def test_robotstxt_error(self):
+        self.crawler.settings.set('ROBOTSTXT_OBEY', True)
+        err = error.DNSLookupError('Robotstxt address not found')
+
+        def return_failure(request, spider):
+            deferred = Deferred()
+            reactor.callFromThread(deferred.errback, failure.Failure(err))
+            return deferred
+        self.crawler.engine.download.side_effect = return_failure
+
+        middleware = RobotsTxtMiddleware(self.crawler)
+        middleware._logerror = mock.MagicMock(side_effect=middleware._logerror)
+        deferred = middleware.process_request(Request('http://site.local'), None)
+        deferred.addCallback(lambda _: self.assertTrue(middleware._logerror.called))
+        return deferred
+
+    def test_robotstxt_immediate_error(self):
+        self.crawler.settings.set('ROBOTSTXT_OBEY', True)
+        err = error.DNSLookupError('Robotstxt address not found')
+
+        def immediate_failure(request, spider):
+            deferred = Deferred()
+            deferred.errback(failure.Failure(err))
+            return deferred
+        self.crawler.engine.download.side_effect = immediate_failure
+
+        middleware = RobotsTxtMiddleware(self.crawler)
+        return self.assertNotIgnored(Request('http://site.local'), middleware)
+
+    def test_ignore_robotstxt_request(self):
+        self.crawler.settings.set('ROBOTSTXT_OBEY', True)
+
+        def ignore_request(request, spider):
+            deferred = Deferred()
+            reactor.callFromThread(deferred.errback, failure.Failure(IgnoreRequest()))
+            return deferred
+        self.crawler.engine.download.side_effect = ignore_request
+
+        middleware = RobotsTxtMiddleware(self.crawler)
+        mw_module_logger.error = mock.MagicMock()
+
+        d = self.assertNotIgnored(Request('http://site.local/allowed'), middleware)
+        d.addCallback(lambda _: self.assertFalse(mw_module_logger.error.called))
+        return d
+
+    def test_robotstxt_user_agent_setting(self):
+        crawler = self._get_successful_crawler()
+        crawler.settings.set('ROBOTSTXT_USER_AGENT', 'Examplebot')
+        crawler.settings.set('USER_AGENT', 'Mozilla/5.0 (X11; Linux x86_64)')
+        middleware = RobotsTxtMiddleware(crawler)
+        rp = mock.MagicMock(return_value=True)
+        middleware.process_request_2(rp, Request('http://site.local/allowed'), None)
+        rp.allowed.assert_called_once_with('http://site.local/allowed', 'Examplebot')
+
+    def assertNotIgnored(self, request, middleware):
+        spider = None  # not actually used
+        dfd = maybeDeferred(middleware.process_request, request, spider)
+        dfd.addCallback(self.assertIsNone)
+        return dfd
+
+    def assertIgnored(self, request, middleware):
+        spider = None  # not actually used
+        return self.assertFailure(maybeDeferred(middleware.process_request, request, spider),
+                                  IgnoreRequest)
+
+
+class RobotsTxtMiddlewareWithRerpTest(RobotsTxtMiddlewareTest):
+    if not rerp_available():
+        skip = "Rerp parser is not installed"
+
+    def setUp(self):
+        super().setUp()
+        self.crawler.settings.set('ROBOTSTXT_PARSER', 'scrapy.robotstxt.RerpRobotParser')
+
+
+class RobotsTxtMiddlewareWithReppyTest(RobotsTxtMiddlewareTest):
+    if not reppy_available():
+        skip = "Reppy parser is not installed"
+
+    def setUp(self):
+        super().setUp()
+        self.crawler.settings.set('ROBOTSTXT_PARSER', 'scrapy.robotstxt.ReppyRobotParser')
diff --git a/tests/test_downloadermiddleware_stats.py b/tests/test_downloadermiddleware_stats.py
index edc26e543..1f2616e35 100644
--- a/tests/test_downloadermiddleware_stats.py
+++ b/tests/test_downloadermiddleware_stats.py
@@ -1,16 +1,20 @@
 from unittest import TestCase
 
-from scrapy.contrib.downloadermiddleware.stats import DownloaderStats
+from scrapy.downloadermiddlewares.stats import DownloaderStats
 from scrapy.http import Request, Response
-from scrapy.spider import Spider
+from scrapy.spiders import Spider
 from scrapy.utils.test import get_crawler
 
 
+class MyException(Exception):
+    pass
+
+
 class TestDownloaderStats(TestCase):
 
     def setUp(self):
-        self.crawler = get_crawler()
-        self.spider = Spider('scrapytest.org')
+        self.crawler = get_crawler(Spider)
+        self.spider = self.crawler._create_spider('scrapytest.org')
         self.mw = DownloaderStats(self.crawler.stats)
 
         self.crawler.stats.open_spider(self.spider)
@@ -18,21 +22,28 @@ class TestDownloaderStats(TestCase):
         self.req = Request('http://scrapytest.org')
         self.res = Response('scrapytest.org', status=400)
 
+    def assertStatsEqual(self, key, value):
+        self.assertEqual(
+            self.crawler.stats.get_value(key, spider=self.spider),
+            value,
+            str(self.crawler.stats.get_stats(self.spider))
+        )
+
     def test_process_request(self):
         self.mw.process_request(self.req, self.spider)
-        self.assertEqual(self.crawler.stats.get_value('downloader/request_count', \
-            spider=self.spider), 1)
-        
+        self.assertStatsEqual('downloader/request_count', 1)
+
     def test_process_response(self):
         self.mw.process_response(self.req, self.res, self.spider)
-        self.assertEqual(self.crawler.stats.get_value('downloader/response_count', \
-            spider=self.spider), 1)
+        self.assertStatsEqual('downloader/response_count', 1)
 
     def test_process_exception(self):
-        self.mw.process_exception(self.req, Exception(), self.spider)
-        self.assertEqual(self.crawler.stats.get_value('downloader/exception_count', \
-            spider=self.spider), 1)
+        self.mw.process_exception(self.req, MyException(), self.spider)
+        self.assertStatsEqual('downloader/exception_count', 1)
+        self.assertStatsEqual(
+            'downloader/exception_type_count/tests.test_downloadermiddleware_stats.MyException',
+            1
+        )
 
     def tearDown(self):
         self.crawler.stats.close_spider(self.spider, '')
-
diff --git a/tests/test_downloadermiddleware_useragent.py b/tests/test_downloadermiddleware_useragent.py
index 5fd5c24be..a286764fd 100644
--- a/tests/test_downloadermiddleware_useragent.py
+++ b/tests/test_downloadermiddleware_useragent.py
@@ -1,24 +1,23 @@
 from unittest import TestCase
 
-from scrapy.spider import Spider
+from scrapy.spiders import Spider
 from scrapy.http import Request
-from scrapy.contrib.downloadermiddleware.useragent import UserAgentMiddleware
+from scrapy.downloadermiddlewares.useragent import UserAgentMiddleware
 from scrapy.utils.test import get_crawler
 
 
 class UserAgentMiddlewareTest(TestCase):
 
     def get_spider_and_mw(self, default_useragent):
-        crawler = get_crawler({'USER_AGENT': default_useragent})
-        spider = Spider('foo')
-        spider.set_crawler(crawler)
+        crawler = get_crawler(Spider, {'USER_AGENT': default_useragent})
+        spider = crawler._create_spider('foo')
         return spider, UserAgentMiddleware.from_crawler(crawler)
 
     def test_default_agent(self):
         spider, mw = self.get_spider_and_mw('default_useragent')
         req = Request('http://scrapytest.org/')
         assert mw.process_request(req, spider) is None
-        self.assertEquals(req.headers['User-Agent'], 'default_useragent')
+        self.assertEqual(req.headers['User-Agent'], b'default_useragent')
 
     def test_remove_agent(self):
         # settings UESR_AGENT to None should remove the user agent
@@ -35,15 +34,16 @@ class UserAgentMiddlewareTest(TestCase):
         mw.spider_opened(spider)
         req = Request('http://scrapytest.org/')
         assert mw.process_request(req, spider) is None
-        self.assertEquals(req.headers['User-Agent'], 'spider_useragent')
+        self.assertEqual(req.headers['User-Agent'], b'spider_useragent')
 
     def test_header_agent(self):
         spider, mw = self.get_spider_and_mw('default_useragent')
         spider.user_agent = 'spider_useragent'
         mw.spider_opened(spider)
-        req = Request('http://scrapytest.org/', headers={'User-Agent': 'header_useragent'})
+        req = Request('http://scrapytest.org/',
+                      headers={'User-Agent': 'header_useragent'})
         assert mw.process_request(req, spider) is None
-        self.assertEquals(req.headers['User-Agent'], 'header_useragent')
+        self.assertEqual(req.headers['User-Agent'], b'header_useragent')
 
     def test_no_agent(self):
         spider, mw = self.get_spider_and_mw(None)
diff --git a/tests/test_dupefilter.py b/tests/test_dupefilter.py
deleted file mode 100644
index b0dd9546b..000000000
--- a/tests/test_dupefilter.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import hashlib
-import unittest
-
-from scrapy.dupefilter import RFPDupeFilter
-from scrapy.http import Request
-
-
-class RFPDupeFilterTest(unittest.TestCase):
-
-    def test_filter(self):
-        dupefilter = RFPDupeFilter()
-        dupefilter.open()
-
-        r1 = Request('http://scrapytest.org/1')
-        r2 = Request('http://scrapytest.org/2')
-        r3 = Request('http://scrapytest.org/2')
-
-        assert not dupefilter.request_seen(r1)
-        assert dupefilter.request_seen(r1)
-
-        assert not dupefilter.request_seen(r2)
-        assert dupefilter.request_seen(r3)
-
-        dupefilter.close('finished')
-
-    def test_request_fingerprint(self):
-        """Test if customization of request_fingerprint method will change
-        output of request_seen.
-
-        """
-        r1 = Request('http://scrapytest.org/index.html')
-        r2 = Request('http://scrapytest.org/INDEX.html')
-
-        dupefilter = RFPDupeFilter()
-        dupefilter.open()
-
-        assert not dupefilter.request_seen(r1)
-        assert not dupefilter.request_seen(r2)
-
-        dupefilter.close('finished')
-
-        class CaseInsensitiveRFPDupeFilter(RFPDupeFilter):
-
-            def request_fingerprint(self, request):
-                fp = hashlib.sha1()
-                fp.update(request.url.lower())
-                return fp.hexdigest()
-
-        case_insensitive_dupefilter = CaseInsensitiveRFPDupeFilter()
-        case_insensitive_dupefilter.open()
-
-        assert not case_insensitive_dupefilter.request_seen(r1)
-        assert case_insensitive_dupefilter.request_seen(r2)
-
-        case_insensitive_dupefilter.close('finished')
diff --git a/tests/test_dupefilters.py b/tests/test_dupefilters.py
new file mode 100644
index 000000000..95a4fca0d
--- /dev/null
+++ b/tests/test_dupefilters.py
@@ -0,0 +1,226 @@
+import hashlib
+import tempfile
+import unittest
+import shutil
+import os
+import sys
+from testfixtures import LogCapture
+
+from scrapy.dupefilters import RFPDupeFilter
+from scrapy.http import Request
+from scrapy.core.scheduler import Scheduler
+from scrapy.utils.python import to_bytes
+from scrapy.utils.job import job_dir
+from scrapy.utils.test import get_crawler
+from tests.spiders import SimpleSpider
+
+
+class FromCrawlerRFPDupeFilter(RFPDupeFilter):
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        debug = crawler.settings.getbool('DUPEFILTER_DEBUG')
+        df = cls(job_dir(crawler.settings), debug)
+        df.method = 'from_crawler'
+        return df
+
+
+class FromSettingsRFPDupeFilter(RFPDupeFilter):
+
+    @classmethod
+    def from_settings(cls, settings):
+        debug = settings.getbool('DUPEFILTER_DEBUG')
+        df = cls(job_dir(settings), debug)
+        df.method = 'from_settings'
+        return df
+
+
+class DirectDupeFilter:
+    method = 'n/a'
+
+
+class RFPDupeFilterTest(unittest.TestCase):
+
+    def test_df_from_crawler_scheduler(self):
+        settings = {'DUPEFILTER_DEBUG': True,
+                    'DUPEFILTER_CLASS': __name__ + '.FromCrawlerRFPDupeFilter'}
+        crawler = get_crawler(settings_dict=settings)
+        scheduler = Scheduler.from_crawler(crawler)
+        self.assertTrue(scheduler.df.debug)
+        self.assertEqual(scheduler.df.method, 'from_crawler')
+
+    def test_df_from_settings_scheduler(self):
+        settings = {'DUPEFILTER_DEBUG': True,
+                    'DUPEFILTER_CLASS': __name__ + '.FromSettingsRFPDupeFilter'}
+        crawler = get_crawler(settings_dict=settings)
+        scheduler = Scheduler.from_crawler(crawler)
+        self.assertTrue(scheduler.df.debug)
+        self.assertEqual(scheduler.df.method, 'from_settings')
+
+    def test_df_direct_scheduler(self):
+        settings = {'DUPEFILTER_CLASS': __name__ + '.DirectDupeFilter'}
+        crawler = get_crawler(settings_dict=settings)
+        scheduler = Scheduler.from_crawler(crawler)
+        self.assertEqual(scheduler.df.method, 'n/a')
+
+    def test_filter(self):
+        dupefilter = RFPDupeFilter()
+        dupefilter.open()
+
+        r1 = Request('http://scrapytest.org/1')
+        r2 = Request('http://scrapytest.org/2')
+        r3 = Request('http://scrapytest.org/2')
+
+        assert not dupefilter.request_seen(r1)
+        assert dupefilter.request_seen(r1)
+
+        assert not dupefilter.request_seen(r2)
+        assert dupefilter.request_seen(r3)
+
+        dupefilter.close('finished')
+
+    def test_dupefilter_path(self):
+        r1 = Request('http://scrapytest.org/1')
+        r2 = Request('http://scrapytest.org/2')
+
+        path = tempfile.mkdtemp()
+        try:
+            df = RFPDupeFilter(path)
+            try:
+                df.open()
+                assert not df.request_seen(r1)
+                assert df.request_seen(r1)
+            finally:
+                df.close('finished')
+
+            df2 = RFPDupeFilter(path)
+            try:
+                df2.open()
+                assert df2.request_seen(r1)
+                assert not df2.request_seen(r2)
+                assert df2.request_seen(r2)
+            finally:
+                df2.close('finished')
+        finally:
+            shutil.rmtree(path)
+
+    def test_request_fingerprint(self):
+        """Test if customization of request_fingerprint method will change
+        output of request_seen.
+
+        """
+        r1 = Request('http://scrapytest.org/index.html')
+        r2 = Request('http://scrapytest.org/INDEX.html')
+
+        dupefilter = RFPDupeFilter()
+        dupefilter.open()
+
+        assert not dupefilter.request_seen(r1)
+        assert not dupefilter.request_seen(r2)
+
+        dupefilter.close('finished')
+
+        class CaseInsensitiveRFPDupeFilter(RFPDupeFilter):
+
+            def request_fingerprint(self, request):
+                fp = hashlib.sha1()
+                fp.update(to_bytes(request.url.lower()))
+                return fp.hexdigest()
+
+        case_insensitive_dupefilter = CaseInsensitiveRFPDupeFilter()
+        case_insensitive_dupefilter.open()
+
+        assert not case_insensitive_dupefilter.request_seen(r1)
+        assert case_insensitive_dupefilter.request_seen(r2)
+
+        case_insensitive_dupefilter.close('finished')
+
+    def test_seenreq_newlines(self):
+        """ Checks against adding duplicate \r to
+        line endings on Windows platforms. """
+
+        r1 = Request('http://scrapytest.org/1')
+
+        path = tempfile.mkdtemp()
+        try:
+            df = RFPDupeFilter(path)
+            df.open()
+            df.request_seen(r1)
+            df.close('finished')
+
+            with open(os.path.join(path, 'requests.seen'), 'rb') as seen_file:
+                line = next(seen_file).decode()
+                assert not line.endswith('\r\r\n')
+                if sys.platform == 'win32':
+                    assert line.endswith('\r\n')
+                else:
+                    assert line.endswith('\n')
+
+        finally:
+            shutil.rmtree(path)
+
+    def test_log(self):
+        with LogCapture() as log:
+            settings = {'DUPEFILTER_DEBUG': False,
+                        'DUPEFILTER_CLASS': __name__ + '.FromCrawlerRFPDupeFilter'}
+            crawler = get_crawler(SimpleSpider, settings_dict=settings)
+            scheduler = Scheduler.from_crawler(crawler)
+            spider = SimpleSpider.from_crawler(crawler)
+
+            dupefilter = scheduler.df
+            dupefilter.open()
+
+            r1 = Request('http://scrapytest.org/index.html')
+            r2 = Request('http://scrapytest.org/index.html')
+
+            dupefilter.log(r1, spider)
+            dupefilter.log(r2, spider)
+
+            assert crawler.stats.get_value('dupefilter/filtered') == 2
+            log.check_present(
+                (
+                    'scrapy.dupefilters',
+                    'DEBUG',
+                    'Filtered duplicate request: <GET http://scrapytest.org/index.html> - no more'
+                    ' duplicates will be shown (see DUPEFILTER_DEBUG to show all duplicates)'
+                )
+            )
+
+            dupefilter.close('finished')
+
+    def test_log_debug(self):
+        with LogCapture() as log:
+            settings = {'DUPEFILTER_DEBUG': True,
+                        'DUPEFILTER_CLASS': __name__ + '.FromCrawlerRFPDupeFilter'}
+            crawler = get_crawler(SimpleSpider, settings_dict=settings)
+            scheduler = Scheduler.from_crawler(crawler)
+            spider = SimpleSpider.from_crawler(crawler)
+
+            dupefilter = scheduler.df
+            dupefilter.open()
+
+            r1 = Request('http://scrapytest.org/index.html')
+            r2 = Request('http://scrapytest.org/index.html',
+                         headers={'Referer': 'http://scrapytest.org/INDEX.html'})
+
+            dupefilter.log(r1, spider)
+            dupefilter.log(r2, spider)
+
+            assert crawler.stats.get_value('dupefilter/filtered') == 2
+            log.check_present(
+                (
+                    'scrapy.dupefilters',
+                    'DEBUG',
+                    'Filtered duplicate request: <GET http://scrapytest.org/index.html> (referer: None)'
+                )
+            )
+            log.check_present(
+                (
+                    'scrapy.dupefilters',
+                    'DEBUG',
+                    'Filtered duplicate request: <GET http://scrapytest.org/index.html>'
+                    ' (referer: http://scrapytest.org/INDEX.html)'
+                )
+            )
+
+            dupefilter.close('finished')
diff --git a/tests/test_engine.py b/tests/test_engine.py
index 6a0314a02..1b848ac72 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -10,36 +10,55 @@ module with the ``runserver`` argument::
     python test_engine.py runserver
 """
 
-from __future__ import print_function
-import sys, os, re
-from six.moves.urllib.parse import urlparse
+import os
+import re
+import sys
+from collections import defaultdict
+from urllib.parse import urlparse
 
-from twisted.internet import reactor, defer
-from twisted.web import server, static, util
+import attr
+from itemadapter import ItemAdapter
+from pydispatch import dispatcher
+from testfixtures import LogCapture
+from twisted.internet import defer, reactor
 from twisted.trial import unittest
+from twisted.web import server, static, util
 
 from scrapy import signals
-from scrapy.utils.test import get_crawler
-from scrapy.xlib.pydispatch import dispatcher
-from tests import tests_datadir
-from scrapy.spider import Spider
-from scrapy.item import Item, Field
-from scrapy.contrib.linkextractors import LinkExtractor
+from scrapy.core.engine import ExecutionEngine
+from scrapy.exceptions import StopDownload
 from scrapy.http import Request
+from scrapy.item import Item, Field
+from scrapy.linkextractors import LinkExtractor
+from scrapy.spiders import Spider
 from scrapy.utils.signal import disconnect_all
+from scrapy.utils.test import get_crawler
+
+from tests import get_testdata, tests_datadir
+
 
 class TestItem(Item):
     name = Field()
     url = Field()
     price = Field()
 
+
+@attr.s
+class AttrsItem:
+    name = attr.ib(default="")
+    url = attr.ib(default="")
+    price = attr.ib(default=0)
+
+
 class TestSpider(Spider):
     name = "scrapytest.org"
     allowed_domains = ["scrapytest.org", "localhost"]
 
-    itemurl_re = re.compile("item\d+.html")
-    name_re = re.compile("<h1>(.*?)</h1>", re.M)
-    price_re = re.compile(">Price: \$(.*?)<", re.M)
+    itemurl_re = re.compile(r"item\d+.html")
+    name_re = re.compile(r"<h1>(.*?)</h1>", re.M)
+    price_re = re.compile(r">Price: \$(.*?)<", re.M)
+
+    item_cls = TestItem
 
     def parse(self, response):
         xlink = LinkExtractor()
@@ -49,58 +68,110 @@ class TestSpider(Spider):
                 yield Request(url=link.url, callback=self.parse_item)
 
     def parse_item(self, response):
-        item = TestItem()
-        m = self.name_re.search(response.body)
+        item = self.item_cls()
+        m = self.name_re.search(response.text)
         if m:
             item['name'] = m.group(1)
         item['url'] = response.url
-        m = self.price_re.search(response.body)
+        m = self.price_re.search(response.text)
         if m:
             item['price'] = m.group(1)
         return item
 
+
+class TestDupeFilterSpider(TestSpider):
+    def start_requests(self):
+        return (Request(url) for url in self.start_urls)  # no dont_filter=True
+
+
+class DictItemsSpider(TestSpider):
+    item_cls = dict
+
+
+class AttrsItemsSpider(TestSpider):
+    item_class = AttrsItem
+
+
+try:
+    from dataclasses import make_dataclass
+except ImportError:
+    DataClassItemsSpider = None
+else:
+    TestDataClass = make_dataclass("TestDataClass", [("name", str), ("url", str), ("price", int)])
+
+    class DataClassItemsSpider(DictItemsSpider):
+        def parse_item(self, response):
+            item = super().parse_item(response)
+            return TestDataClass(
+                name=item.get('name'),
+                url=item.get('url'),
+                price=item.get('price'),
+            )
+
+
+class ItemZeroDivisionErrorSpider(TestSpider):
+    custom_settings = {
+        "ITEM_PIPELINES": {
+            "tests.pipelines.ProcessWithZeroDivisionErrorPipiline": 300,
+        }
+    }
+
+
 def start_test_site(debug=False):
     root_dir = os.path.join(tests_datadir, "test_site")
     r = static.File(root_dir)
-    r.putChild("redirect", util.Redirect("/redirected"))
-    r.putChild("redirected", static.Data("Redirected here", "text/plain"))
+    r.putChild(b"redirect", util.Redirect(b"/redirected"))
+    r.putChild(b"redirected", static.Data(b"Redirected here", "text/plain"))
+    numbers = [str(x).encode("utf8") for x in range(2**18)]
+    r.putChild(b"numbers", static.Data(b"".join(numbers), "text/plain"))
 
     port = reactor.listenTCP(0, server.Site(r), interface="127.0.0.1")
     if debug:
-        print("Test server running at http://localhost:%d/ - hit Ctrl-C to finish." \
-            % port.getHost().port)
+        print("Test server running at http://localhost:%d/ - hit Ctrl-C to finish."
+              % port.getHost().port)
     return port
 
 
-class CrawlerRun(object):
+class CrawlerRun:
     """A class to run the crawler and keep track of events occurred"""
 
-    def __init__(self):
+    def __init__(self, spider_class):
         self.spider = None
         self.respplug = []
         self.reqplug = []
+        self.reqdropped = []
+        self.reqreached = []
+        self.itemerror = []
         self.itemresp = []
-        self.signals_catched = {}
+        self.bytes = defaultdict(lambda: list())
+        self.signals_caught = {}
+        self.spider_class = spider_class
 
     def run(self):
         self.port = start_test_site()
         self.portno = self.port.getHost().port
 
-        start_urls = [self.geturl("/"), self.geturl("/redirect")]
-        self.spider = TestSpider(start_urls=start_urls)
+        start_urls = [
+            self.geturl("/"),
+            self.geturl("/redirect"),
+            self.geturl("/redirect"),  # duplicate
+            self.geturl("/numbers"),
+        ]
 
         for name, signal in vars(signals).items():
             if not name.startswith('_'):
                 dispatcher.connect(self.record_signal, signal)
 
-        self.crawler = get_crawler()
-        self.crawler.install()
-        self.crawler.configure()
+        self.crawler = get_crawler(self.spider_class)
         self.crawler.signals.connect(self.item_scraped, signals.item_scraped)
+        self.crawler.signals.connect(self.item_error, signals.item_error)
+        self.crawler.signals.connect(self.bytes_received, signals.bytes_received)
         self.crawler.signals.connect(self.request_scheduled, signals.request_scheduled)
+        self.crawler.signals.connect(self.request_dropped, signals.request_dropped)
+        self.crawler.signals.connect(self.request_reached, signals.request_reached_downloader)
         self.crawler.signals.connect(self.response_downloaded, signals.response_downloaded)
-        self.crawler.crawl(self.spider)
-        self.crawler.start()
+        self.crawler.crawl(start_urls=start_urls)
+        self.spider = self.crawler.spider
 
         self.deferred = defer.Deferred()
         dispatcher.connect(self.stop, signals.engine_stopped)
@@ -111,7 +182,6 @@ class CrawlerRun(object):
         for name, signal in vars(signals).items():
             if not name.startswith('_'):
                 disconnect_all(signal)
-        self.crawler.uninstall()
         self.deferred.callback(None)
 
     def geturl(self, path):
@@ -121,12 +191,24 @@ class CrawlerRun(object):
         u = urlparse(url)
         return u.path
 
+    def item_error(self, item, response, spider, failure):
+        self.itemerror.append((item, response, spider, failure))
+
     def item_scraped(self, item, spider, response):
         self.itemresp.append((item, response))
 
+    def bytes_received(self, data, request, spider):
+        self.bytes[request].append(data)
+
     def request_scheduled(self, request, spider):
         self.reqplug.append((request, spider))
 
+    def request_reached(self, request, spider):
+        self.reqreached.append((request, spider))
+
+    def request_dropped(self, request, spider):
+        self.reqdropped.append((request, spider))
+
     def response_downloaded(self, response, spider):
         self.respplug.append((response, spider))
 
@@ -135,40 +217,79 @@ class CrawlerRun(object):
         signalargs = kwargs.copy()
         sig = signalargs.pop('signal')
         signalargs.pop('sender', None)
-        self.signals_catched[sig] = signalargs
+        self.signals_caught[sig] = signalargs
+
+
+class StopDownloadCrawlerRun(CrawlerRun):
+    """
+    Make sure raising the StopDownload exception stops the download of the response body
+    """
+
+    def bytes_received(self, data, request, spider):
+        super().bytes_received(data, request, spider)
+        raise StopDownload(fail=False)
 
 
 class EngineTest(unittest.TestCase):
 
     @defer.inlineCallbacks
     def test_crawler(self):
-        self.run = CrawlerRun()
+
+        for spider in (TestSpider, DictItemsSpider, AttrsItemsSpider, DataClassItemsSpider):
+            if spider is None:
+                continue
+            self.run = CrawlerRun(spider)
+            yield self.run.run()
+            self._assert_visited_urls()
+            self._assert_scheduled_requests(urls_to_visit=9)
+            self._assert_downloaded_responses()
+            self._assert_scraped_items()
+            self._assert_signals_caught()
+            self._assert_bytes_received()
+
+    @defer.inlineCallbacks
+    def test_crawler_dupefilter(self):
+        self.run = CrawlerRun(TestDupeFilterSpider)
         yield self.run.run()
-        self._assert_visited_urls()
-        self._assert_scheduled_requests()
-        self._assert_downloaded_responses()
-        self._assert_scraped_items()
-        self._assert_signals_catched()
+        self._assert_scheduled_requests(urls_to_visit=8)
+        self._assert_dropped_requests()
+
+    @defer.inlineCallbacks
+    def test_crawler_itemerror(self):
+        self.run = CrawlerRun(ItemZeroDivisionErrorSpider)
+        yield self.run.run()
+        self._assert_items_error()
 
     def _assert_visited_urls(self):
         must_be_visited = ["/", "/redirect", "/redirected",
                            "/item1.html", "/item2.html", "/item999.html"]
-        urls_visited = set([rp[0].url for rp in self.run.respplug])
-        urls_expected = set([self.run.geturl(p) for p in must_be_visited])
+        urls_visited = {rp[0].url for rp in self.run.respplug}
+        urls_expected = {self.run.geturl(p) for p in must_be_visited}
         assert urls_expected <= urls_visited, "URLs not visited: %s" % list(urls_expected - urls_visited)
 
-    def _assert_scheduled_requests(self):
-        self.assertEqual(6, len(self.run.reqplug))
+    def _assert_scheduled_requests(self, urls_to_visit=None):
+        self.assertEqual(urls_to_visit, len(self.run.reqplug))
 
         paths_expected = ['/item999.html', '/item2.html', '/item1.html']
 
-        urls_requested = set([rq[0].url for rq in self.run.reqplug])
-        urls_expected = set([self.run.geturl(p) for p in paths_expected])
+        urls_requested = {rq[0].url for rq in self.run.reqplug}
+        urls_expected = {self.run.geturl(p) for p in paths_expected}
         assert urls_expected <= urls_requested
+        scheduled_requests_count = len(self.run.reqplug)
+        dropped_requests_count = len(self.run.reqdropped)
+        responses_count = len(self.run.respplug)
+        self.assertEqual(scheduled_requests_count,
+                         dropped_requests_count + responses_count)
+        self.assertEqual(len(self.run.reqreached),
+                         responses_count)
+
+    def _assert_dropped_requests(self):
+        self.assertEqual(len(self.run.reqdropped), 1)
 
     def _assert_downloaded_responses(self):
         # response tests
-        self.assertEqual(6, len(self.run.respplug))
+        self.assertEqual(9, len(self.run.respplug))
+        self.assertEqual(9, len(self.run.reqreached))
 
         for response, _ in self.run.respplug:
             if self.run.getpath(response.url) == '/item999.html':
@@ -176,9 +297,12 @@ class EngineTest(unittest.TestCase):
             if self.run.getpath(response.url) == '/redirect':
                 self.assertEqual(302, response.status)
 
-    def _assert_scraped_items(self):
-        self.assertEqual(2, len(self.run.itemresp))
-        for item, response in self.run.itemresp:
+    def _assert_items_error(self):
+        self.assertEqual(2, len(self.run.itemerror))
+        for item, response, spider, failure in self.run.itemerror:
+            self.assertEqual(failure.value.__class__, ZeroDivisionError)
+            self.assertEqual(spider, self.run.spider)
+
             self.assertEqual(item['url'], response.url)
             if 'item1.html' in item['url']:
                 self.assertEqual('Item 1 name', item['name'])
@@ -187,20 +311,135 @@ class EngineTest(unittest.TestCase):
                 self.assertEqual('Item 2 name', item['name'])
                 self.assertEqual('200', item['price'])
 
-    def _assert_signals_catched(self):
-        assert signals.engine_started in self.run.signals_catched
-        assert signals.engine_stopped in self.run.signals_catched
-        assert signals.spider_opened in self.run.signals_catched
-        assert signals.spider_idle in self.run.signals_catched
-        assert signals.spider_closed in self.run.signals_catched
+    def _assert_scraped_items(self):
+        self.assertEqual(2, len(self.run.itemresp))
+        for item, response in self.run.itemresp:
+            item = ItemAdapter(item)
+            self.assertEqual(item['url'], response.url)
+            if 'item1.html' in item['url']:
+                self.assertEqual('Item 1 name', item['name'])
+                self.assertEqual('100', item['price'])
+            if 'item2.html' in item['url']:
+                self.assertEqual('Item 2 name', item['name'])
+                self.assertEqual('200', item['price'])
+
+    def _assert_bytes_received(self):
+        self.assertEqual(9, len(self.run.bytes))
+        for request, data in self.run.bytes.items():
+            joined_data = b"".join(data)
+            if self.run.getpath(request.url) == "/":
+                self.assertEqual(joined_data, get_testdata("test_site", "index.html"))
+            elif self.run.getpath(request.url) == "/item1.html":
+                self.assertEqual(joined_data, get_testdata("test_site", "item1.html"))
+            elif self.run.getpath(request.url) == "/item2.html":
+                self.assertEqual(joined_data, get_testdata("test_site", "item2.html"))
+            elif self.run.getpath(request.url) == "/redirected":
+                self.assertEqual(joined_data, b"Redirected here")
+            elif self.run.getpath(request.url) == '/redirect':
+                self.assertEqual(
+                    joined_data,
+                    b"\n<html>\n"
+                    b"    <head>\n"
+                    b"        <meta http-equiv=\"refresh\" content=\"0;URL=/redirected\">\n"
+                    b"    </head>\n"
+                    b"    <body bgcolor=\"#FFFFFF\" text=\"#000000\">\n"
+                    b"    <a href=\"/redirected\">click here</a>\n"
+                    b"    </body>\n"
+                    b"</html>\n"
+                )
+            elif self.run.getpath(request.url) == "/tem999.html":
+                self.assertEqual(
+                    joined_data,
+                    b"\n<html>\n"
+                    b"  <head><title>404 - No Such Resource</title></head>\n"
+                    b"  <body>\n"
+                    b"    <h1>No Such Resource</h1>\n"
+                    b"    <p>File not found.</p>\n"
+                    b"  </body>\n"
+                    b"</html>\n"
+                )
+            elif self.run.getpath(request.url) == "/numbers":
+                # signal was fired multiple times
+                self.assertTrue(len(data) > 1)
+                # bytes were received in order
+                numbers = [str(x).encode("utf8") for x in range(2**18)]
+                self.assertEqual(joined_data, b"".join(numbers))
+
+    def _assert_signals_caught(self):
+        assert signals.engine_started in self.run.signals_caught
+        assert signals.engine_stopped in self.run.signals_caught
+        assert signals.spider_opened in self.run.signals_caught
+        assert signals.spider_idle in self.run.signals_caught
+        assert signals.spider_closed in self.run.signals_caught
 
         self.assertEqual({'spider': self.run.spider},
-                         self.run.signals_catched[signals.spider_opened])
+                         self.run.signals_caught[signals.spider_opened])
         self.assertEqual({'spider': self.run.spider},
-                         self.run.signals_catched[signals.spider_idle])
-        self.run.signals_catched[signals.spider_closed].pop('spider_stats', None) # XXX: remove for scrapy 0.17
+                         self.run.signals_caught[signals.spider_idle])
         self.assertEqual({'spider': self.run.spider, 'reason': 'finished'},
-                         self.run.signals_catched[signals.spider_closed])
+                         self.run.signals_caught[signals.spider_closed])
+
+    @defer.inlineCallbacks
+    def test_close_downloader(self):
+        e = ExecutionEngine(get_crawler(TestSpider), lambda _: None)
+        yield e.close()
+
+    @defer.inlineCallbacks
+    def test_close_spiders_downloader(self):
+        e = ExecutionEngine(get_crawler(TestSpider), lambda _: None)
+        yield e.open_spider(TestSpider(), [])
+        self.assertEqual(len(e.open_spiders), 1)
+        yield e.close()
+        self.assertEqual(len(e.open_spiders), 0)
+
+    @defer.inlineCallbacks
+    def test_close_engine_spiders_downloader(self):
+        e = ExecutionEngine(get_crawler(TestSpider), lambda _: None)
+        yield e.open_spider(TestSpider(), [])
+        e.start()
+        self.assertTrue(e.running)
+        yield e.close()
+        self.assertFalse(e.running)
+        self.assertEqual(len(e.open_spiders), 0)
+
+
+class StopDownloadEngineTest(EngineTest):
+
+    @defer.inlineCallbacks
+    def test_crawler(self):
+        for spider in TestSpider, DictItemsSpider:
+            self.run = StopDownloadCrawlerRun(spider)
+            with LogCapture() as log:
+                yield self.run.run()
+                log.check_present(("scrapy.core.downloader.handlers.http11",
+                                   "DEBUG",
+                                   "Download stopped for <GET http://localhost:{}/redirected> from signal handler"
+                                   " StopDownloadCrawlerRun.bytes_received".format(self.run.portno)))
+                log.check_present(("scrapy.core.downloader.handlers.http11",
+                                   "DEBUG",
+                                   "Download stopped for <GET http://localhost:{}/> from signal handler"
+                                   " StopDownloadCrawlerRun.bytes_received".format(self.run.portno)))
+                log.check_present(("scrapy.core.downloader.handlers.http11",
+                                   "DEBUG",
+                                   "Download stopped for <GET http://localhost:{}/numbers> from signal handler"
+                                   " StopDownloadCrawlerRun.bytes_received".format(self.run.portno)))
+            self._assert_visited_urls()
+            self._assert_scheduled_requests(urls_to_visit=9)
+            self._assert_downloaded_responses()
+            self._assert_signals_caught()
+            self._assert_bytes_received()
+
+    def _assert_bytes_received(self):
+        self.assertEqual(9, len(self.run.bytes))
+        for request, data in self.run.bytes.items():
+            joined_data = b"".join(data)
+            self.assertTrue(len(data) == 1)  # signal was fired only once
+            if self.run.getpath(request.url) == "/numbers":
+                # Received bytes are not the complete response. The exact amount depends
+                # on the buffer size, which can vary, so we only check that the amount
+                # of received bytes is strictly less than the full response.
+                numbers = [str(x).encode("utf8") for x in range(2**18)]
+                self.assertTrue(len(joined_data) < len(b"".join(numbers)))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_exporters.py b/tests/test_exporters.py
new file mode 100644
index 000000000..6c25a0064
--- /dev/null
+++ b/tests/test_exporters.py
@@ -0,0 +1,616 @@
+import re
+import json
+import marshal
+import pickle
+import tempfile
+import unittest
+from io import BytesIO
+from datetime import datetime
+
+import lxml.etree
+from itemadapter import ItemAdapter
+
+from scrapy.item import Item, Field
+from scrapy.utils.python import to_unicode
+from scrapy.exporters import (
+    BaseItemExporter, PprintItemExporter, PickleItemExporter, CsvItemExporter,
+    XmlItemExporter, JsonLinesItemExporter, JsonItemExporter,
+    PythonItemExporter, MarshalItemExporter
+)
+
+
+class TestItem(Item):
+    name = Field()
+    age = Field()
+
+
+def custom_serializer(value):
+    return str(int(value) + 2)
+
+
+class CustomFieldItem(Item):
+    name = Field()
+    age = Field(serializer=custom_serializer)
+
+
+try:
+    from dataclasses import make_dataclass, field
+except ImportError:
+    TestDataClass = None
+    CustomFieldDataclass = None
+else:
+    TestDataClass = make_dataclass("TestDataClass", [("name", str), ("age", int)])
+    CustomFieldDataclass = make_dataclass(
+        "CustomFieldDataclass",
+        [("name", str), ("age", int, field(metadata={"serializer": custom_serializer}))]
+    )
+
+
+class BaseItemExporterTest(unittest.TestCase):
+
+    item_class = TestItem
+    custom_field_item_class = CustomFieldItem
+
+    def setUp(self):
+        if self.item_class is None:
+            raise unittest.SkipTest("item class is None")
+        self.i = self.item_class(name='John\xa3', age='22')
+        self.output = BytesIO()
+        self.ie = self._get_exporter()
+
+    def _get_exporter(self, **kwargs):
+        return BaseItemExporter(**kwargs)
+
+    def _check_output(self):
+        pass
+
+    def _assert_expected_item(self, exported_dict):
+        for k, v in exported_dict.items():
+            exported_dict[k] = to_unicode(v)
+        self.assertEqual(self.i, self.item_class(**exported_dict))
+
+    def _get_nonstring_types_item(self):
+        return {
+            'boolean': False,
+            'number': 22,
+            'time': datetime(2015, 1, 1, 1, 1, 1),
+            'float': 3.14,
+        }
+
+    def assertItemExportWorks(self, item):
+        self.ie.start_exporting()
+        try:
+            self.ie.export_item(item)
+        except NotImplementedError:
+            if self.ie.__class__ is not BaseItemExporter:
+                raise
+        self.ie.finish_exporting()
+        self._check_output()
+
+    def test_export_item(self):
+        self.assertItemExportWorks(self.i)
+
+    def test_export_dict_item(self):
+        self.assertItemExportWorks(ItemAdapter(self.i).asdict())
+
+    def test_serialize_field(self):
+        a = ItemAdapter(self.i)
+        res = self.ie.serialize_field(a.get_field_meta('name'), 'name', a['name'])
+        self.assertEqual(res, 'John\xa3')
+
+        res = self.ie.serialize_field(a.get_field_meta('age'), 'age', a['age'])
+        self.assertEqual(res, '22')
+
+    def test_fields_to_export(self):
+        ie = self._get_exporter(fields_to_export=['name'])
+        self.assertEqual(list(ie._get_serialized_fields(self.i)), [('name', 'John\xa3')])
+
+        ie = self._get_exporter(fields_to_export=['name'], encoding='latin-1')
+        _, name = list(ie._get_serialized_fields(self.i))[0]
+        assert isinstance(name, str)
+        self.assertEqual(name, 'John\xa3')
+
+    def test_field_custom_serializer(self):
+        i = self.custom_field_item_class(name='John\xa3', age='22')
+        a = ItemAdapter(i)
+        ie = self._get_exporter()
+        self.assertEqual(ie.serialize_field(a.get_field_meta('name'), 'name', a['name']), 'John\xa3')
+        self.assertEqual(ie.serialize_field(a.get_field_meta('age'), 'age', a['age']), '24')
+
+
+class BaseItemExporterDataclassTest(BaseItemExporterTest):
+    item_class = TestDataClass
+    custom_field_item_class = CustomFieldDataclass
+
+
+class PythonItemExporterTest(BaseItemExporterTest):
+    def _get_exporter(self, **kwargs):
+        return PythonItemExporter(binary=False, **kwargs)
+
+    def test_invalid_option(self):
+        with self.assertRaisesRegex(TypeError, "Unexpected options: invalid_option"):
+            PythonItemExporter(invalid_option='something')
+
+    def test_nested_item(self):
+        i1 = self.item_class(name='Joseph', age='22')
+        i2 = dict(name='Maria', age=i1)
+        i3 = self.item_class(name='Jesus', age=i2)
+        ie = self._get_exporter()
+        exported = ie.export_item(i3)
+        self.assertEqual(type(exported), dict)
+        self.assertEqual(
+            exported,
+            {'age': {'age': {'age': '22', 'name': 'Joseph'}, 'name': 'Maria'}, 'name': 'Jesus'}
+        )
+        self.assertEqual(type(exported['age']), dict)
+        self.assertEqual(type(exported['age']['age']), dict)
+
+    def test_export_list(self):
+        i1 = self.item_class(name='Joseph', age='22')
+        i2 = self.item_class(name='Maria', age=[i1])
+        i3 = self.item_class(name='Jesus', age=[i2])
+        ie = self._get_exporter()
+        exported = ie.export_item(i3)
+        self.assertEqual(
+            exported,
+            {'age': [{'age': [{'age': '22', 'name': 'Joseph'}], 'name': 'Maria'}], 'name': 'Jesus'}
+        )
+        self.assertEqual(type(exported['age'][0]), dict)
+        self.assertEqual(type(exported['age'][0]['age'][0]), dict)
+
+    def test_export_item_dict_list(self):
+        i1 = self.item_class(name='Joseph', age='22')
+        i2 = dict(name='Maria', age=[i1])
+        i3 = self.item_class(name='Jesus', age=[i2])
+        ie = self._get_exporter()
+        exported = ie.export_item(i3)
+        self.assertEqual(
+            exported,
+            {'age': [{'age': [{'age': '22', 'name': 'Joseph'}], 'name': 'Maria'}], 'name': 'Jesus'}
+        )
+        self.assertEqual(type(exported['age'][0]), dict)
+        self.assertEqual(type(exported['age'][0]['age'][0]), dict)
+
+    def test_export_binary(self):
+        exporter = PythonItemExporter(binary=True)
+        value = self.item_class(name='John\xa3', age='22')
+        expected = {b'name': b'John\xc2\xa3', b'age': b'22'}
+        self.assertEqual(expected, exporter.export_item(value))
+
+    def test_nonstring_types_item(self):
+        item = self._get_nonstring_types_item()
+        ie = self._get_exporter()
+        exported = ie.export_item(item)
+        self.assertEqual(exported, item)
+
+
+class PythonItemExporterDataclassTest(PythonItemExporterTest):
+    item_class = TestDataClass
+    custom_field_item_class = CustomFieldDataclass
+
+
+class PprintItemExporterTest(BaseItemExporterTest):
+
+    def _get_exporter(self, **kwargs):
+        return PprintItemExporter(self.output, **kwargs)
+
+    def _check_output(self):
+        self._assert_expected_item(eval(self.output.getvalue()))
+
+
+class PprintItemExporterDataclassTest(PprintItemExporterTest):
+    item_class = TestDataClass
+    custom_field_item_class = CustomFieldDataclass
+
+
+class PickleItemExporterTest(BaseItemExporterTest):
+
+    def _get_exporter(self, **kwargs):
+        return PickleItemExporter(self.output, **kwargs)
+
+    def _check_output(self):
+        self._assert_expected_item(pickle.loads(self.output.getvalue()))
+
+    def test_export_multiple_items(self):
+        i1 = self.item_class(name='hello', age='world')
+        i2 = self.item_class(name='bye', age='world')
+        f = BytesIO()
+        ie = PickleItemExporter(f)
+        ie.start_exporting()
+        ie.export_item(i1)
+        ie.export_item(i2)
+        ie.finish_exporting()
+        f.seek(0)
+        self.assertEqual(self.item_class(**pickle.load(f)), i1)
+        self.assertEqual(self.item_class(**pickle.load(f)), i2)
+
+    def test_nonstring_types_item(self):
+        item = self._get_nonstring_types_item()
+        fp = BytesIO()
+        ie = PickleItemExporter(fp)
+        ie.start_exporting()
+        ie.export_item(item)
+        ie.finish_exporting()
+        self.assertEqual(pickle.loads(fp.getvalue()), item)
+
+
+class PickleItemExporterDataclassTest(PickleItemExporterTest):
+    item_class = TestDataClass
+    custom_field_item_class = CustomFieldDataclass
+
+
+class MarshalItemExporterTest(BaseItemExporterTest):
+
+    def _get_exporter(self, **kwargs):
+        self.output = tempfile.TemporaryFile()
+        return MarshalItemExporter(self.output, **kwargs)
+
+    def _check_output(self):
+        self.output.seek(0)
+        self._assert_expected_item(marshal.load(self.output))
+
+    def test_nonstring_types_item(self):
+        item = self._get_nonstring_types_item()
+        item.pop('time')  # datetime is not marshallable
+        fp = tempfile.TemporaryFile()
+        ie = MarshalItemExporter(fp)
+        ie.start_exporting()
+        ie.export_item(item)
+        ie.finish_exporting()
+        fp.seek(0)
+        self.assertEqual(marshal.load(fp), item)
+
+
+class MarshalItemExporterDataclassTest(MarshalItemExporterTest):
+    item_class = TestDataClass
+    custom_field_item_class = CustomFieldDataclass
+
+
+class CsvItemExporterTest(BaseItemExporterTest):
+    def _get_exporter(self, **kwargs):
+        return CsvItemExporter(self.output, **kwargs)
+
+    def assertCsvEqual(self, first, second, msg=None):
+        def split_csv(csv):
+            return [
+                sorted(re.split(r"(,|\s+)", line))
+                for line in to_unicode(csv).splitlines(True)
+            ]
+        return self.assertEqual(split_csv(first), split_csv(second), msg=msg)
+
+    def _check_output(self):
+        self.assertCsvEqual(to_unicode(self.output.getvalue()), 'age,name\r\n22,John\xa3\r\n')
+
+    def assertExportResult(self, item, expected, **kwargs):
+        fp = BytesIO()
+        ie = CsvItemExporter(fp, **kwargs)
+        ie.start_exporting()
+        ie.export_item(item)
+        ie.finish_exporting()
+        self.assertCsvEqual(fp.getvalue(), expected)
+
+    def test_header_export_all(self):
+        self.assertExportResult(
+            item=self.i,
+            fields_to_export=ItemAdapter(self.i).field_names(),
+            expected=b'age,name\r\n22,John\xc2\xa3\r\n',
+        )
+
+    def test_header_export_all_dict(self):
+        self.assertExportResult(
+            item=ItemAdapter(self.i).asdict(),
+            expected=b'age,name\r\n22,John\xc2\xa3\r\n',
+        )
+
+    def test_header_export_single_field(self):
+        for item in [self.i, ItemAdapter(self.i).asdict()]:
+            self.assertExportResult(
+                item=item,
+                fields_to_export=['age'],
+                expected=b'age\r\n22\r\n',
+            )
+
+    def test_header_export_two_items(self):
+        for item in [self.i, ItemAdapter(self.i).asdict()]:
+            output = BytesIO()
+            ie = CsvItemExporter(output)
+            ie.start_exporting()
+            ie.export_item(item)
+            ie.export_item(item)
+            ie.finish_exporting()
+            self.assertCsvEqual(output.getvalue(),
+                                b'age,name\r\n22,John\xc2\xa3\r\n22,John\xc2\xa3\r\n')
+
+    def test_header_no_header_line(self):
+        for item in [self.i, ItemAdapter(self.i).asdict()]:
+            self.assertExportResult(
+                item=item,
+                include_headers_line=False,
+                expected=b'22,John\xc2\xa3\r\n',
+            )
+
+    def test_join_multivalue(self):
+        class TestItem2(Item):
+            name = Field()
+            friends = Field()
+
+        for cls in TestItem2, dict:
+            self.assertExportResult(
+                item=cls(name='John', friends=['Mary', 'Paul']),
+                include_headers_line=False,
+                expected='"Mary,Paul",John\r\n',
+            )
+
+    def test_join_multivalue_not_strings(self):
+        self.assertExportResult(
+            item=dict(name='John', friends=[4, 8]),
+            include_headers_line=False,
+            expected='"[4, 8]",John\r\n',
+        )
+
+    def test_nonstring_types_item(self):
+        self.assertExportResult(
+            item=self._get_nonstring_types_item(),
+            include_headers_line=False,
+            expected='22,False,3.14,2015-01-01 01:01:01\r\n'
+        )
+
+
+class CsvItemExporterDataclassTest(CsvItemExporterTest):
+    item_class = TestDataClass
+    custom_field_item_class = CustomFieldDataclass
+
+
+class XmlItemExporterTest(BaseItemExporterTest):
+
+    def _get_exporter(self, **kwargs):
+        return XmlItemExporter(self.output, **kwargs)
+
+    def assertXmlEquivalent(self, first, second, msg=None):
+        def xmltuple(elem):
+            children = list(elem.iterchildren())
+            if children:
+                return [(child.tag, sorted(xmltuple(child))) for child in children]
+            else:
+                return [(elem.tag, [(elem.text, ())])]
+
+        def xmlsplit(xmlcontent):
+            doc = lxml.etree.fromstring(xmlcontent)
+            return xmltuple(doc)
+        return self.assertEqual(xmlsplit(first), xmlsplit(second), msg)
+
+    def assertExportResult(self, item, expected_value):
+        fp = BytesIO()
+        ie = XmlItemExporter(fp)
+        ie.start_exporting()
+        ie.export_item(item)
+        ie.finish_exporting()
+        self.assertXmlEquivalent(fp.getvalue(), expected_value)
+
+    def _check_output(self):
+        expected_value = (
+            b'<?xml version="1.0" encoding="utf-8"?>\n'
+            b'<items><item><age>22</age><name>John\xc2\xa3</name></item></items>'
+        )
+        self.assertXmlEquivalent(self.output.getvalue(), expected_value)
+
+    def test_multivalued_fields(self):
+        self.assertExportResult(
+            self.item_class(name=['John\xa3', 'Doe'], age=[1, 2, 3]),
+            b"""<?xml version="1.0" encoding="utf-8"?>\n
+            <items>
+                <item>
+                    <name><value>John\xc2\xa3</value><value>Doe</value></name>
+                    <age><value>1</value><value>2</value><value>3</value></age>
+                </item>
+            </items>
+            """
+        )
+
+    def test_nested_item(self):
+        i1 = dict(name='foo\xa3hoo', age='22')
+        i2 = dict(name='bar', age=i1)
+        i3 = self.item_class(name='buz', age=i2)
+
+        self.assertExportResult(
+            i3,
+            b"""<?xml version="1.0" encoding="utf-8"?>\n
+                <items>
+                    <item>
+                        <age>
+                            <age>
+                                <age>22</age>
+                                <name>foo\xc2\xa3hoo</name>
+                            </age>
+                            <name>bar</name>
+                        </age>
+                        <name>buz</name>
+                    </item>
+                </items>
+            """
+        )
+
+    def test_nested_list_item(self):
+        i1 = dict(name='foo')
+        i2 = dict(name='bar', v2={"egg": ["spam"]})
+        i3 = self.item_class(name='buz', age=[i1, i2])
+
+        self.assertExportResult(
+            i3,
+            b"""<?xml version="1.0" encoding="utf-8"?>\n
+                <items>
+                    <item>
+                        <age>
+                            <value><name>foo</name></value>
+                            <value><name>bar</name><v2><egg><value>spam</value></egg></v2></value>
+                        </age>
+                        <name>buz</name>
+                    </item>
+                </items>
+            """
+        )
+
+    def test_nonstring_types_item(self):
+        item = self._get_nonstring_types_item()
+        self.assertExportResult(
+            item,
+            b"""<?xml version="1.0" encoding="utf-8"?>\n
+                <items>
+                   <item>
+                       <float>3.14</float>
+                       <boolean>False</boolean>
+                       <number>22</number>
+                       <time>2015-01-01 01:01:01</time>
+                   </item>
+                </items>
+            """
+        )
+
+
+class XmlItemExporterDataclassTest(XmlItemExporterTest):
+
+    item_class = TestDataClass
+    custom_field_item_class = CustomFieldDataclass
+
+
+class JsonLinesItemExporterTest(BaseItemExporterTest):
+
+    _expected_nested = {'name': 'Jesus', 'age': {'name': 'Maria', 'age': {'name': 'Joseph', 'age': '22'}}}
+
+    def _get_exporter(self, **kwargs):
+        return JsonLinesItemExporter(self.output, **kwargs)
+
+    def _check_output(self):
+        exported = json.loads(to_unicode(self.output.getvalue().strip()))
+        self.assertEqual(exported, ItemAdapter(self.i).asdict())
+
+    def test_nested_item(self):
+        i1 = self.item_class(name='Joseph', age='22')
+        i2 = dict(name='Maria', age=i1)
+        i3 = self.item_class(name='Jesus', age=i2)
+        self.ie.start_exporting()
+        self.ie.export_item(i3)
+        self.ie.finish_exporting()
+        exported = json.loads(to_unicode(self.output.getvalue()))
+        self.assertEqual(exported, self._expected_nested)
+
+    def test_extra_keywords(self):
+        self.ie = self._get_exporter(sort_keys=True)
+        self.test_export_item()
+        self._check_output()
+        self.assertRaises(TypeError, self._get_exporter, foo_unknown_keyword_bar=True)
+
+    def test_nonstring_types_item(self):
+        item = self._get_nonstring_types_item()
+        self.ie.start_exporting()
+        self.ie.export_item(item)
+        self.ie.finish_exporting()
+        exported = json.loads(to_unicode(self.output.getvalue()))
+        item['time'] = str(item['time'])
+        self.assertEqual(exported, item)
+
+
+class JsonLinesItemExporterDataclassTest(JsonLinesItemExporterTest):
+
+    item_class = TestDataClass
+    custom_field_item_class = CustomFieldDataclass
+
+
+class JsonItemExporterTest(JsonLinesItemExporterTest):
+
+    _expected_nested = [JsonLinesItemExporterTest._expected_nested]
+
+    def _get_exporter(self, **kwargs):
+        return JsonItemExporter(self.output, **kwargs)
+
+    def _check_output(self):
+        exported = json.loads(to_unicode(self.output.getvalue().strip()))
+        self.assertEqual(exported, [ItemAdapter(self.i).asdict()])
+
+    def assertTwoItemsExported(self, item):
+        self.ie.start_exporting()
+        self.ie.export_item(item)
+        self.ie.export_item(item)
+        self.ie.finish_exporting()
+        exported = json.loads(to_unicode(self.output.getvalue()))
+        self.assertEqual(exported, [ItemAdapter(item).asdict(), ItemAdapter(item).asdict()])
+
+    def test_two_items(self):
+        self.assertTwoItemsExported(self.i)
+
+    def test_two_dict_items(self):
+        self.assertTwoItemsExported(ItemAdapter(self.i).asdict())
+
+    def test_nested_item(self):
+        i1 = self.item_class(name='Joseph\xa3', age='22')
+        i2 = self.item_class(name='Maria', age=i1)
+        i3 = self.item_class(name='Jesus', age=i2)
+        self.ie.start_exporting()
+        self.ie.export_item(i3)
+        self.ie.finish_exporting()
+        exported = json.loads(to_unicode(self.output.getvalue()))
+        expected = {'name': 'Jesus', 'age': {'name': 'Maria', 'age': ItemAdapter(i1).asdict()}}
+        self.assertEqual(exported, [expected])
+
+    def test_nested_dict_item(self):
+        i1 = dict(name='Joseph\xa3', age='22')
+        i2 = self.item_class(name='Maria', age=i1)
+        i3 = dict(name='Jesus', age=i2)
+        self.ie.start_exporting()
+        self.ie.export_item(i3)
+        self.ie.finish_exporting()
+        exported = json.loads(to_unicode(self.output.getvalue()))
+        expected = {'name': 'Jesus', 'age': {'name': 'Maria', 'age': i1}}
+        self.assertEqual(exported, [expected])
+
+    def test_nonstring_types_item(self):
+        item = self._get_nonstring_types_item()
+        self.ie.start_exporting()
+        self.ie.export_item(item)
+        self.ie.finish_exporting()
+        exported = json.loads(to_unicode(self.output.getvalue()))
+        item['time'] = str(item['time'])
+        self.assertEqual(exported, [item])
+
+
+class JsonItemExporterDataclassTest(JsonItemExporterTest):
+
+    item_class = TestDataClass
+    custom_field_item_class = CustomFieldDataclass
+
+
+class CustomExporterItemTest(unittest.TestCase):
+
+    item_class = TestItem
+
+    def setUp(self):
+        if self.item_class is None:
+            raise unittest.SkipTest("item class is None")
+
+    def test_exporter_custom_serializer(self):
+        class CustomItemExporter(BaseItemExporter):
+            def serialize_field(self, field, name, value):
+                if name == 'age':
+                    return str(int(value) + 1)
+                else:
+                    return super().serialize_field(field, name, value)
+
+        i = self.item_class(name='John', age='22')
+        a = ItemAdapter(i)
+        ie = CustomItemExporter()
+
+        self.assertEqual(ie.serialize_field(a.get_field_meta('name'), 'name', a['name']), 'John')
+        self.assertEqual(ie.serialize_field(a.get_field_meta('age'), 'age', a['age']), '23')
+
+        i2 = {'name': 'John', 'age': '22'}
+        self.assertEqual(ie.serialize_field({}, 'name', i2['name']), 'John')
+        self.assertEqual(ie.serialize_field({}, 'age', i2['age']), '23')
+
+
+class CustomExporterDataclassTest(CustomExporterItemTest):
+
+    item_class = TestDataClass
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/test_extension_telnet.py b/tests/test_extension_telnet.py
new file mode 100644
index 000000000..1e716b94a
--- /dev/null
+++ b/tests/test_extension_telnet.py
@@ -0,0 +1,53 @@
+from twisted.trial import unittest
+from twisted.conch.telnet import ITelnetProtocol
+from twisted.cred import credentials
+from twisted.internet import defer
+
+from scrapy.extensions.telnet import TelnetConsole
+from scrapy.utils.test import get_crawler
+
+
+class TelnetExtensionTest(unittest.TestCase):
+    def _get_console_and_portal(self, settings=None):
+        crawler = get_crawler(settings_dict=settings)
+        console = TelnetConsole(crawler)
+
+        # This function has some side effects we don't need for this test
+        console._get_telnet_vars = lambda: {}
+
+        console.start_listening()
+        protocol = console.protocol()
+        portal = protocol.protocolArgs[0]
+
+        return console, portal
+
+    @defer.inlineCallbacks
+    def test_bad_credentials(self):
+        console, portal = self._get_console_and_portal()
+        creds = credentials.UsernamePassword(b'username', b'password')
+        d = portal.login(creds, None, ITelnetProtocol)
+        yield self.assertFailure(d, ValueError)
+        console.stop_listening()
+
+    @defer.inlineCallbacks
+    def test_good_credentials(self):
+        console, portal = self._get_console_and_portal()
+        creds = credentials.UsernamePassword(
+            console.username.encode('utf8'),
+            console.password.encode('utf8')
+        )
+        d = portal.login(creds, None, ITelnetProtocol)
+        yield d
+        console.stop_listening()
+
+    @defer.inlineCallbacks
+    def test_custom_credentials(self):
+        settings = {
+            'TELNETCONSOLE_USERNAME': 'user',
+            'TELNETCONSOLE_PASSWORD': 'pass',
+        }
+        console, portal = self._get_console_and_portal(settings=settings)
+        creds = credentials.UsernamePassword(b'user', b'pass')
+        d = portal.login(creds, None, ITelnetProtocol)
+        yield d
+        console.stop_listening()
diff --git a/tests/test_feedexport.py b/tests/test_feedexport.py
new file mode 100644
index 000000000..850485b5e
--- /dev/null
+++ b/tests/test_feedexport.py
@@ -0,0 +1,1886 @@
+import csv
+import json
+import os
+import random
+import shutil
+import string
+import tempfile
+import warnings
+from abc import ABC, abstractmethod
+from collections import defaultdict
+from io import BytesIO
+from logging import getLogger
+from pathlib import Path
+from string import ascii_letters, digits
+from unittest import mock
+from urllib.parse import urljoin, urlparse, quote
+from urllib.request import pathname2url
+
+import lxml.etree
+from testfixtures import LogCapture
+from twisted.internet import defer
+from twisted.trial import unittest
+from w3lib.url import file_uri_to_path, path_to_file_uri
+from zope.interface import implementer
+from zope.interface.verify import verifyObject
+
+import scrapy
+from scrapy.crawler import CrawlerRunner
+from scrapy.exceptions import NotConfigured, ScrapyDeprecationWarning
+from scrapy.exporters import CsvItemExporter
+from scrapy.extensions.feedexport import (
+    BlockingFeedStorage,
+    FeedExporter,
+    FileFeedStorage,
+    FTPFeedStorage,
+    GCSFeedStorage,
+    IFeedStorage,
+    S3FeedStorage,
+    StdoutFeedStorage,
+)
+from scrapy.settings import Settings
+from scrapy.utils.python import to_unicode
+from scrapy.utils.test import (
+    assert_aws_environ,
+    get_s3_content_and_delete,
+    get_crawler,
+    mock_google_cloud_storage,
+)
+
+from tests.mockserver import MockFTPServer, MockServer
+
+
+class FileFeedStorageTest(unittest.TestCase):
+
+    def test_store_file_uri(self):
+        path = os.path.abspath(self.mktemp())
+        uri = path_to_file_uri(path)
+        return self._assert_stores(FileFeedStorage(uri), path)
+
+    def test_store_file_uri_makedirs(self):
+        path = os.path.abspath(self.mktemp())
+        path = os.path.join(path, 'more', 'paths', 'file.txt')
+        uri = path_to_file_uri(path)
+        return self._assert_stores(FileFeedStorage(uri), path)
+
+    def test_store_direct_path(self):
+        path = os.path.abspath(self.mktemp())
+        return self._assert_stores(FileFeedStorage(path), path)
+
+    def test_store_direct_path_relative(self):
+        path = self.mktemp()
+        return self._assert_stores(FileFeedStorage(path), path)
+
+    def test_interface(self):
+        path = self.mktemp()
+        st = FileFeedStorage(path)
+        verifyObject(IFeedStorage, st)
+
+    def _store(self, feed_options=None):
+        path = os.path.abspath(self.mktemp())
+        storage = FileFeedStorage(path, feed_options=feed_options)
+        spider = scrapy.Spider("default")
+        file = storage.open(spider)
+        file.write(b"content")
+        storage.store(file)
+        return path
+
+    def test_append(self):
+        path = self._store()
+        return self._assert_stores(FileFeedStorage(path), path, b"contentcontent")
+
+    def test_overwrite(self):
+        path = self._store({"overwrite": True})
+        return self._assert_stores(
+            FileFeedStorage(path, feed_options={"overwrite": True}),
+            path
+        )
+
+    @defer.inlineCallbacks
+    def _assert_stores(self, storage, path, expected_content=b"content"):
+        spider = scrapy.Spider("default")
+        file = storage.open(spider)
+        file.write(b"content")
+        yield storage.store(file)
+        self.assertTrue(os.path.exists(path))
+        try:
+            with open(path, 'rb') as fp:
+                self.assertEqual(fp.read(), expected_content)
+        finally:
+            os.unlink(path)
+
+
+class FTPFeedStorageTest(unittest.TestCase):
+
+    def get_test_spider(self, settings=None):
+        class TestSpider(scrapy.Spider):
+            name = 'test_spider'
+
+        crawler = get_crawler(settings_dict=settings)
+        spider = TestSpider.from_crawler(crawler)
+        return spider
+
+    def _store(self, uri, content, feed_options=None, settings=None):
+        crawler = get_crawler(settings_dict=settings or {})
+        storage = FTPFeedStorage.from_crawler(
+            crawler,
+            uri,
+            feed_options=feed_options,
+        )
+        verifyObject(IFeedStorage, storage)
+        spider = self.get_test_spider()
+        file = storage.open(spider)
+        file.write(content)
+        return storage.store(file)
+
+    def _assert_stored(self, path, content):
+        self.assertTrue(path.exists())
+        try:
+            with path.open('rb') as fp:
+                self.assertEqual(fp.read(), content)
+        finally:
+            os.unlink(str(path))
+
+    @defer.inlineCallbacks
+    def test_append(self):
+        with MockFTPServer() as ftp_server:
+            filename = 'file'
+            url = ftp_server.url(filename)
+            feed_options = {'overwrite': False}
+            yield self._store(url, b"foo", feed_options=feed_options)
+            yield self._store(url, b"bar", feed_options=feed_options)
+            self._assert_stored(ftp_server.path / filename, b"foobar")
+
+    @defer.inlineCallbacks
+    def test_overwrite(self):
+        with MockFTPServer() as ftp_server:
+            filename = 'file'
+            url = ftp_server.url(filename)
+            yield self._store(url, b"foo")
+            yield self._store(url, b"bar")
+            self._assert_stored(ftp_server.path / filename, b"bar")
+
+    @defer.inlineCallbacks
+    def test_append_active_mode(self):
+        with MockFTPServer() as ftp_server:
+            settings = {'FEED_STORAGE_FTP_ACTIVE': True}
+            filename = 'file'
+            url = ftp_server.url(filename)
+            feed_options = {'overwrite': False}
+            yield self._store(url, b"foo", feed_options=feed_options, settings=settings)
+            yield self._store(url, b"bar", feed_options=feed_options, settings=settings)
+            self._assert_stored(ftp_server.path / filename, b"foobar")
+
+    @defer.inlineCallbacks
+    def test_overwrite_active_mode(self):
+        with MockFTPServer() as ftp_server:
+            settings = {'FEED_STORAGE_FTP_ACTIVE': True}
+            filename = 'file'
+            url = ftp_server.url(filename)
+            yield self._store(url, b"foo", settings=settings)
+            yield self._store(url, b"bar", settings=settings)
+            self._assert_stored(ftp_server.path / filename, b"bar")
+
+    def test_uri_auth_quote(self):
+        # RFC3986: 3.2.1. User Information
+        pw_quoted = quote(string.punctuation, safe='')
+        st = FTPFeedStorage('ftp://foo:%s@example.com/some_path' % pw_quoted,
+                            {})
+        self.assertEqual(st.password, string.punctuation)
+
+
+class BlockingFeedStorageTest(unittest.TestCase):
+
+    def get_test_spider(self, settings=None):
+        class TestSpider(scrapy.Spider):
+            name = 'test_spider'
+
+        crawler = get_crawler(settings_dict=settings)
+        spider = TestSpider.from_crawler(crawler)
+        return spider
+
+    def test_default_temp_dir(self):
+        b = BlockingFeedStorage()
+
+        tmp = b.open(self.get_test_spider())
+        tmp_path = os.path.dirname(tmp.name)
+        self.assertEqual(tmp_path, tempfile.gettempdir())
+
+    def test_temp_file(self):
+        b = BlockingFeedStorage()
+
+        tests_path = os.path.dirname(os.path.abspath(__file__))
+        spider = self.get_test_spider({'FEED_TEMPDIR': tests_path})
+        tmp = b.open(spider)
+        tmp_path = os.path.dirname(tmp.name)
+        self.assertEqual(tmp_path, tests_path)
+
+    def test_invalid_folder(self):
+        b = BlockingFeedStorage()
+
+        tests_path = os.path.dirname(os.path.abspath(__file__))
+        invalid_path = os.path.join(tests_path, 'invalid_path')
+        spider = self.get_test_spider({'FEED_TEMPDIR': invalid_path})
+
+        self.assertRaises(OSError, b.open, spider=spider)
+
+
+class S3FeedStorageTest(unittest.TestCase):
+
+    def test_parse_credentials(self):
+        try:
+            import botocore  # noqa: F401
+        except ImportError:
+            raise unittest.SkipTest("S3FeedStorage requires botocore")
+        aws_credentials = {'AWS_ACCESS_KEY_ID': 'settings_key',
+                           'AWS_SECRET_ACCESS_KEY': 'settings_secret'}
+        crawler = get_crawler(settings_dict=aws_credentials)
+        # Instantiate with crawler
+        storage = S3FeedStorage.from_crawler(
+            crawler,
+            's3://mybucket/export.csv',
+        )
+        self.assertEqual(storage.access_key, 'settings_key')
+        self.assertEqual(storage.secret_key, 'settings_secret')
+        # Instantiate directly
+        storage = S3FeedStorage('s3://mybucket/export.csv',
+                                aws_credentials['AWS_ACCESS_KEY_ID'],
+                                aws_credentials['AWS_SECRET_ACCESS_KEY'])
+        self.assertEqual(storage.access_key, 'settings_key')
+        self.assertEqual(storage.secret_key, 'settings_secret')
+        # URI priority > settings priority
+        storage = S3FeedStorage('s3://uri_key:uri_secret@mybucket/export.csv',
+                                aws_credentials['AWS_ACCESS_KEY_ID'],
+                                aws_credentials['AWS_SECRET_ACCESS_KEY'])
+        self.assertEqual(storage.access_key, 'uri_key')
+        self.assertEqual(storage.secret_key, 'uri_secret')
+
+    @defer.inlineCallbacks
+    def test_store(self):
+        assert_aws_environ()
+        uri = os.environ.get('S3_TEST_FILE_URI')
+        if not uri:
+            raise unittest.SkipTest("No S3 URI available for testing")
+        access_key = os.environ.get('AWS_ACCESS_KEY_ID')
+        secret_key = os.environ.get('AWS_SECRET_ACCESS_KEY')
+        storage = S3FeedStorage(uri, access_key, secret_key)
+        verifyObject(IFeedStorage, storage)
+        file = storage.open(scrapy.Spider("default"))
+        expected_content = b"content: \xe2\x98\x83"
+        file.write(expected_content)
+        yield storage.store(file)
+        u = urlparse(uri)
+        content = get_s3_content_and_delete(u.hostname, u.path[1:])
+        self.assertEqual(content, expected_content)
+
+    def test_init_without_acl(self):
+        storage = S3FeedStorage(
+            's3://mybucket/export.csv',
+            'access_key',
+            'secret_key'
+        )
+        self.assertEqual(storage.access_key, 'access_key')
+        self.assertEqual(storage.secret_key, 'secret_key')
+        self.assertEqual(storage.acl, None)
+
+    def test_init_with_acl(self):
+        storage = S3FeedStorage(
+            's3://mybucket/export.csv',
+            'access_key',
+            'secret_key',
+            'custom-acl'
+        )
+        self.assertEqual(storage.access_key, 'access_key')
+        self.assertEqual(storage.secret_key, 'secret_key')
+        self.assertEqual(storage.acl, 'custom-acl')
+
+    def test_from_crawler_without_acl(self):
+        settings = {
+            'AWS_ACCESS_KEY_ID': 'access_key',
+            'AWS_SECRET_ACCESS_KEY': 'secret_key',
+        }
+        crawler = get_crawler(settings_dict=settings)
+        storage = S3FeedStorage.from_crawler(
+            crawler,
+            's3://mybucket/export.csv',
+        )
+        self.assertEqual(storage.access_key, 'access_key')
+        self.assertEqual(storage.secret_key, 'secret_key')
+        self.assertEqual(storage.acl, None)
+
+    def test_from_crawler_with_acl(self):
+        settings = {
+            'AWS_ACCESS_KEY_ID': 'access_key',
+            'AWS_SECRET_ACCESS_KEY': 'secret_key',
+            'FEED_STORAGE_S3_ACL': 'custom-acl',
+        }
+        crawler = get_crawler(settings_dict=settings)
+        storage = S3FeedStorage.from_crawler(
+            crawler,
+            's3://mybucket/export.csv',
+        )
+        self.assertEqual(storage.access_key, 'access_key')
+        self.assertEqual(storage.secret_key, 'secret_key')
+        self.assertEqual(storage.acl, 'custom-acl')
+
+    @defer.inlineCallbacks
+    def test_store_botocore_without_acl(self):
+        try:
+            import botocore  # noqa: F401
+        except ImportError:
+            raise unittest.SkipTest('botocore is required')
+
+        storage = S3FeedStorage(
+            's3://mybucket/export.csv',
+            'access_key',
+            'secret_key',
+        )
+        self.assertEqual(storage.access_key, 'access_key')
+        self.assertEqual(storage.secret_key, 'secret_key')
+        self.assertEqual(storage.acl, None)
+
+        storage.s3_client = mock.MagicMock()
+        yield storage.store(BytesIO(b'test file'))
+        self.assertNotIn('ACL', storage.s3_client.put_object.call_args[1])
+
+    @defer.inlineCallbacks
+    def test_store_botocore_with_acl(self):
+        try:
+            import botocore  # noqa: F401
+        except ImportError:
+            raise unittest.SkipTest('botocore is required')
+
+        storage = S3FeedStorage(
+            's3://mybucket/export.csv',
+            'access_key',
+            'secret_key',
+            'custom-acl'
+        )
+        self.assertEqual(storage.access_key, 'access_key')
+        self.assertEqual(storage.secret_key, 'secret_key')
+        self.assertEqual(storage.acl, 'custom-acl')
+
+        storage.s3_client = mock.MagicMock()
+        yield storage.store(BytesIO(b'test file'))
+        self.assertEqual(
+            storage.s3_client.put_object.call_args[1].get('ACL'),
+            'custom-acl'
+        )
+
+    @defer.inlineCallbacks
+    def test_store_not_botocore_without_acl(self):
+        storage = S3FeedStorage(
+            's3://mybucket/export.csv',
+            'access_key',
+            'secret_key',
+        )
+        self.assertEqual(storage.access_key, 'access_key')
+        self.assertEqual(storage.secret_key, 'secret_key')
+        self.assertEqual(storage.acl, None)
+
+        storage.is_botocore = False
+        storage.connect_s3 = mock.MagicMock()
+        self.assertFalse(storage.is_botocore)
+
+        yield storage.store(BytesIO(b'test file'))
+
+        conn = storage.connect_s3(*storage.connect_s3.call_args)
+        bucket = conn.get_bucket(*conn.get_bucket.call_args)
+        key = bucket.new_key(*bucket.new_key.call_args)
+        self.assertNotIn(
+            dict(policy='custom-acl'),
+            key.set_contents_from_file.call_args
+        )
+
+    @defer.inlineCallbacks
+    def test_store_not_botocore_with_acl(self):
+        storage = S3FeedStorage(
+            's3://mybucket/export.csv',
+            'access_key',
+            'secret_key',
+            'custom-acl'
+        )
+        self.assertEqual(storage.access_key, 'access_key')
+        self.assertEqual(storage.secret_key, 'secret_key')
+        self.assertEqual(storage.acl, 'custom-acl')
+
+        storage.is_botocore = False
+        storage.connect_s3 = mock.MagicMock()
+        self.assertFalse(storage.is_botocore)
+
+        yield storage.store(BytesIO(b'test file'))
+
+        conn = storage.connect_s3(*storage.connect_s3.call_args)
+        bucket = conn.get_bucket(*conn.get_bucket.call_args)
+        key = bucket.new_key(*bucket.new_key.call_args)
+        self.assertIn(
+            dict(policy='custom-acl'),
+            key.set_contents_from_file.call_args
+        )
+
+    def test_overwrite_default(self):
+        with LogCapture() as log:
+            S3FeedStorage(
+                's3://mybucket/export.csv',
+                'access_key',
+                'secret_key',
+                'custom-acl'
+            )
+        self.assertNotIn('S3 does not support appending to files', str(log))
+
+    def test_overwrite_false(self):
+        with LogCapture() as log:
+            S3FeedStorage(
+                's3://mybucket/export.csv',
+                'access_key',
+                'secret_key',
+                'custom-acl',
+                feed_options={'overwrite': False},
+            )
+        self.assertIn('S3 does not support appending to files', str(log))
+
+
+class GCSFeedStorageTest(unittest.TestCase):
+
+    def test_parse_settings(self):
+        try:
+            from google.cloud.storage import Client  # noqa
+        except ImportError:
+            raise unittest.SkipTest("GCSFeedStorage requires google-cloud-storage")
+
+        settings = {'GCS_PROJECT_ID': '123', 'FEED_STORAGE_GCS_ACL': 'publicRead'}
+        crawler = get_crawler(settings_dict=settings)
+        storage = GCSFeedStorage.from_crawler(crawler, 'gs://mybucket/export.csv')
+        assert storage.project_id == '123'
+        assert storage.acl == 'publicRead'
+        assert storage.bucket_name == 'mybucket'
+        assert storage.blob_name == 'export.csv'
+
+    def test_parse_empty_acl(self):
+        try:
+            from google.cloud.storage import Client  # noqa
+        except ImportError:
+            raise unittest.SkipTest("GCSFeedStorage requires google-cloud-storage")
+
+        settings = {'GCS_PROJECT_ID': '123', 'FEED_STORAGE_GCS_ACL': ''}
+        crawler = get_crawler(settings_dict=settings)
+        storage = GCSFeedStorage.from_crawler(crawler, 'gs://mybucket/export.csv')
+        assert storage.acl is None
+
+        settings = {'GCS_PROJECT_ID': '123', 'FEED_STORAGE_GCS_ACL': None}
+        crawler = get_crawler(settings_dict=settings)
+        storage = GCSFeedStorage.from_crawler(crawler, 'gs://mybucket/export.csv')
+        assert storage.acl is None
+
+    @defer.inlineCallbacks
+    def test_store(self):
+        try:
+            from google.cloud.storage import Client  # noqa
+        except ImportError:
+            raise unittest.SkipTest("GCSFeedStorage requires google-cloud-storage")
+
+        uri = 'gs://mybucket/export.csv'
+        project_id = 'myproject-123'
+        acl = 'publicRead'
+        (client_mock, bucket_mock, blob_mock) = mock_google_cloud_storage()
+        with mock.patch('google.cloud.storage.Client') as m:
+            m.return_value = client_mock
+
+            f = mock.Mock()
+            storage = GCSFeedStorage(uri, project_id, acl)
+            yield storage.store(f)
+
+            f.seek.assert_called_once_with(0)
+            m.assert_called_once_with(project=project_id)
+            client_mock.get_bucket.assert_called_once_with('mybucket')
+            bucket_mock.blob.assert_called_once_with('export.csv')
+            blob_mock.upload_from_file.assert_called_once_with(f, predefined_acl=acl)
+
+
+class StdoutFeedStorageTest(unittest.TestCase):
+
+    @defer.inlineCallbacks
+    def test_store(self):
+        out = BytesIO()
+        storage = StdoutFeedStorage('stdout:', _stdout=out)
+        file = storage.open(scrapy.Spider("default"))
+        file.write(b"content")
+        yield storage.store(file)
+        self.assertEqual(out.getvalue(), b"content")
+
+    def test_overwrite_default(self):
+        with LogCapture() as log:
+            StdoutFeedStorage('stdout:')
+        self.assertNotIn('Standard output (stdout) storage does not support overwriting', str(log))
+
+    def test_overwrite_true(self):
+        with LogCapture() as log:
+            StdoutFeedStorage('stdout:', feed_options={'overwrite': True})
+        self.assertIn('Standard output (stdout) storage does not support overwriting', str(log))
+
+
+class FromCrawlerMixin:
+    init_with_crawler = False
+
+    @classmethod
+    def from_crawler(cls, crawler, *args, feed_options=None, **kwargs):
+        cls.init_with_crawler = True
+        return cls(*args, **kwargs)
+
+
+class FromCrawlerCsvItemExporter(CsvItemExporter, FromCrawlerMixin):
+    pass
+
+
+class FromCrawlerFileFeedStorage(FileFeedStorage, FromCrawlerMixin):
+
+    @classmethod
+    def from_crawler(cls, crawler, *args, feed_options=None, **kwargs):
+        cls.init_with_crawler = True
+        return cls(*args, feed_options=feed_options, **kwargs)
+
+
+class DummyBlockingFeedStorage(BlockingFeedStorage):
+
+    def __init__(self, uri):
+        self.path = file_uri_to_path(uri)
+
+    def _store_in_thread(self, file):
+        dirname = os.path.dirname(self.path)
+        if dirname and not os.path.exists(dirname):
+            os.makedirs(dirname)
+        with open(self.path, 'ab') as output_file:
+            output_file.write(file.read())
+
+        file.close()
+
+
+class FailingBlockingFeedStorage(DummyBlockingFeedStorage):
+
+    def _store_in_thread(self, file):
+        raise OSError('Cannot store')
+
+
+@implementer(IFeedStorage)
+class LogOnStoreFileStorage:
+    """
+    This storage logs inside `store` method.
+    It can be used to make sure `store` method is invoked.
+    """
+
+    def __init__(self, uri):
+        self.path = file_uri_to_path(uri)
+        self.logger = getLogger()
+
+    def open(self, spider):
+        return tempfile.NamedTemporaryFile(prefix='feed-')
+
+    def store(self, file):
+        self.logger.info('Storage.store is called')
+        file.close()
+
+
+class FeedExportTestBase(ABC, unittest.TestCase):
+    __test__ = False
+
+    class MyItem(scrapy.Item):
+        foo = scrapy.Field()
+        egg = scrapy.Field()
+        baz = scrapy.Field()
+
+    def _random_temp_filename(self, inter_dir=''):
+        chars = [random.choice(ascii_letters + digits) for _ in range(15)]
+        filename = ''.join(chars)
+        return os.path.join(self.temp_dir, inter_dir, filename)
+
+    def setUp(self):
+        self.temp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    @defer.inlineCallbacks
+    def exported_data(self, items, settings):
+        """
+        Return exported data which a spider yielding ``items`` would return.
+        """
+
+        class TestSpider(scrapy.Spider):
+            name = 'testspider'
+
+            def parse(self, response):
+                for item in items:
+                    yield item
+
+        data = yield self.run_and_export(TestSpider, settings)
+        return data
+
+    @defer.inlineCallbacks
+    def exported_no_data(self, settings):
+        """
+        Return exported data which a spider yielding no ``items`` would return.
+        """
+
+        class TestSpider(scrapy.Spider):
+            name = 'testspider'
+
+            def parse(self, response):
+                pass
+
+        data = yield self.run_and_export(TestSpider, settings)
+        return data
+
+    @defer.inlineCallbacks
+    def assertExported(self, items, header, rows, settings=None, ordered=True):
+        yield self.assertExportedCsv(items, header, rows, settings, ordered)
+        yield self.assertExportedJsonLines(items, rows, settings)
+        yield self.assertExportedXml(items, rows, settings)
+        yield self.assertExportedPickle(items, rows, settings)
+        yield self.assertExportedMarshal(items, rows, settings)
+        yield self.assertExportedMultiple(items, rows, settings)
+
+    @abstractmethod
+    def run_and_export(self, spider_cls, settings):
+        pass
+
+    def _load_until_eof(self, data, load_func):
+        result = []
+        with tempfile.TemporaryFile() as temp:
+            temp.write(data)
+            temp.seek(0)
+            while True:
+                try:
+                    result.append(load_func(temp))
+                except EOFError:
+                    break
+        return result
+
+
+class FeedExportTest(FeedExportTestBase):
+    __test__ = True
+
+    @defer.inlineCallbacks
+    def run_and_export(self, spider_cls, settings):
+        """ Run spider with specified settings; return exported data. """
+
+        def path_to_url(path):
+            return urljoin('file:', pathname2url(str(path)))
+
+        def printf_escape(string):
+            return string.replace('%', '%%')
+
+        FEEDS = settings.get('FEEDS') or {}
+        settings['FEEDS'] = {
+            printf_escape(path_to_url(file_path)): feed_options
+            for file_path, feed_options in FEEDS.items()
+        }
+
+        content = {}
+        try:
+            with MockServer() as s:
+                runner = CrawlerRunner(Settings(settings))
+                spider_cls.start_urls = [s.url('/')]
+                yield runner.crawl(spider_cls)
+
+            for file_path, feed_options in FEEDS.items():
+                if not os.path.exists(str(file_path)):
+                    continue
+
+                with open(str(file_path), 'rb') as f:
+                    content[feed_options['format']] = f.read()
+
+        finally:
+            for file_path in FEEDS.keys():
+                if not os.path.exists(str(file_path)):
+                    continue
+
+                os.remove(str(file_path))
+
+        return content
+
+    @defer.inlineCallbacks
+    def assertExportedCsv(self, items, header, rows, settings=None, ordered=True):
+        settings = settings or {}
+        settings.update({
+            'FEEDS': {
+                self._random_temp_filename(): {'format': 'csv'},
+            },
+        })
+        data = yield self.exported_data(items, settings)
+
+        reader = csv.DictReader(to_unicode(data['csv']).splitlines())
+        got_rows = list(reader)
+        if ordered:
+            self.assertEqual(reader.fieldnames, header)
+        else:
+            self.assertEqual(set(reader.fieldnames), set(header))
+
+        self.assertEqual(rows, got_rows)
+
+    @defer.inlineCallbacks
+    def assertExportedJsonLines(self, items, rows, settings=None):
+        settings = settings or {}
+        settings.update({
+            'FEEDS': {
+                self._random_temp_filename(): {'format': 'jl'},
+            },
+        })
+        data = yield self.exported_data(items, settings)
+        parsed = [json.loads(to_unicode(line)) for line in data['jl'].splitlines()]
+        rows = [{k: v for k, v in row.items() if v} for row in rows]
+        self.assertEqual(rows, parsed)
+
+    @defer.inlineCallbacks
+    def assertExportedXml(self, items, rows, settings=None):
+        settings = settings or {}
+        settings.update({
+            'FEEDS': {
+                self._random_temp_filename(): {'format': 'xml'},
+            },
+        })
+        data = yield self.exported_data(items, settings)
+        rows = [{k: v for k, v in row.items() if v} for row in rows]
+        root = lxml.etree.fromstring(data['xml'])
+        got_rows = [{e.tag: e.text for e in it} for it in root.findall('item')]
+        self.assertEqual(rows, got_rows)
+
+    @defer.inlineCallbacks
+    def assertExportedMultiple(self, items, rows, settings=None):
+        settings = settings or {}
+        settings.update({
+            'FEEDS': {
+                self._random_temp_filename(): {'format': 'xml'},
+                self._random_temp_filename(): {'format': 'json'},
+            },
+        })
+        data = yield self.exported_data(items, settings)
+        rows = [{k: v for k, v in row.items() if v} for row in rows]
+        # XML
+        root = lxml.etree.fromstring(data['xml'])
+        xml_rows = [{e.tag: e.text for e in it} for it in root.findall('item')]
+        self.assertEqual(rows, xml_rows)
+        # JSON
+        json_rows = json.loads(to_unicode(data['json']))
+        self.assertEqual(rows, json_rows)
+
+    @defer.inlineCallbacks
+    def assertExportedPickle(self, items, rows, settings=None):
+        settings = settings or {}
+        settings.update({
+            'FEEDS': {
+                self._random_temp_filename(): {'format': 'pickle'},
+            },
+        })
+        data = yield self.exported_data(items, settings)
+        expected = [{k: v for k, v in row.items() if v} for row in rows]
+        import pickle
+        result = self._load_until_eof(data['pickle'], load_func=pickle.load)
+        self.assertEqual(expected, result)
+
+    @defer.inlineCallbacks
+    def assertExportedMarshal(self, items, rows, settings=None):
+        settings = settings or {}
+        settings.update({
+            'FEEDS': {
+                self._random_temp_filename(): {'format': 'marshal'},
+            },
+        })
+        data = yield self.exported_data(items, settings)
+        expected = [{k: v for k, v in row.items() if v} for row in rows]
+        import marshal
+        result = self._load_until_eof(data['marshal'], load_func=marshal.load)
+        self.assertEqual(expected, result)
+
+    @defer.inlineCallbacks
+    def test_export_items(self):
+        # feed exporters use field names from Item
+        items = [
+            self.MyItem({'foo': 'bar1', 'egg': 'spam1'}),
+            self.MyItem({'foo': 'bar2', 'egg': 'spam2', 'baz': 'quux2'}),
+        ]
+        rows = [
+            {'egg': 'spam1', 'foo': 'bar1', 'baz': ''},
+            {'egg': 'spam2', 'foo': 'bar2', 'baz': 'quux2'}
+        ]
+        header = self.MyItem.fields.keys()
+        yield self.assertExported(items, header, rows, ordered=False)
+
+    @defer.inlineCallbacks
+    def test_export_no_items_not_store_empty(self):
+        for fmt in ('json', 'jsonlines', 'xml', 'csv'):
+            settings = {
+                'FEEDS': {
+                    self._random_temp_filename(): {'format': fmt},
+                },
+            }
+            data = yield self.exported_no_data(settings)
+            self.assertEqual(b'', data[fmt])
+
+    @defer.inlineCallbacks
+    def test_export_no_items_store_empty(self):
+        formats = (
+            ('json', b'[]'),
+            ('jsonlines', b''),
+            ('xml', b'<?xml version="1.0" encoding="utf-8"?>\n<items></items>'),
+            ('csv', b''),
+        )
+
+        for fmt, expctd in formats:
+            settings = {
+                'FEEDS': {
+                    self._random_temp_filename(): {'format': fmt},
+                },
+                'FEED_STORE_EMPTY': True,
+                'FEED_EXPORT_INDENT': None,
+            }
+            data = yield self.exported_no_data(settings)
+            self.assertEqual(expctd, data[fmt])
+
+    @defer.inlineCallbacks
+    def test_export_no_items_multiple_feeds(self):
+        """ Make sure that `storage.store` is called for every feed. """
+        settings = {
+            'FEEDS': {
+                self._random_temp_filename(): {'format': 'json'},
+                self._random_temp_filename(): {'format': 'xml'},
+                self._random_temp_filename(): {'format': 'csv'},
+            },
+            'FEED_STORAGES': {'file': 'tests.test_feedexport.LogOnStoreFileStorage'},
+            'FEED_STORE_EMPTY': False
+        }
+
+        with LogCapture() as log:
+            yield self.exported_no_data(settings)
+
+        print(log)
+        self.assertEqual(str(log).count('Storage.store is called'), 3)
+
+    @defer.inlineCallbacks
+    def test_export_multiple_item_classes(self):
+
+        class MyItem2(scrapy.Item):
+            foo = scrapy.Field()
+            hello = scrapy.Field()
+
+        items = [
+            self.MyItem({'foo': 'bar1', 'egg': 'spam1'}),
+            MyItem2({'hello': 'world2', 'foo': 'bar2'}),
+            self.MyItem({'foo': 'bar3', 'egg': 'spam3', 'baz': 'quux3'}),
+            {'hello': 'world4', 'egg': 'spam4'},
+        ]
+
+        # by default, Scrapy uses fields of the first Item for CSV and
+        # all fields for JSON Lines
+        header = self.MyItem.fields.keys()
+        rows_csv = [
+            {'egg': 'spam1', 'foo': 'bar1', 'baz': ''},
+            {'egg': '', 'foo': 'bar2', 'baz': ''},
+            {'egg': 'spam3', 'foo': 'bar3', 'baz': 'quux3'},
+            {'egg': 'spam4', 'foo': '', 'baz': ''},
+        ]
+        rows_jl = [dict(row) for row in items]
+        yield self.assertExportedCsv(items, header, rows_csv, ordered=False)
+        yield self.assertExportedJsonLines(items, rows_jl)
+
+        # edge case: FEED_EXPORT_FIELDS==[] means the same as default None
+        settings = {'FEED_EXPORT_FIELDS': []}
+        yield self.assertExportedCsv(items, header, rows_csv, ordered=False)
+        yield self.assertExportedJsonLines(items, rows_jl, settings)
+
+        # it is possible to override fields using FEED_EXPORT_FIELDS
+        header = ["foo", "baz", "hello"]
+        settings = {'FEED_EXPORT_FIELDS': header}
+        rows = [
+            {'foo': 'bar1', 'baz': '', 'hello': ''},
+            {'foo': 'bar2', 'baz': '', 'hello': 'world2'},
+            {'foo': 'bar3', 'baz': 'quux3', 'hello': ''},
+            {'foo': '', 'baz': '', 'hello': 'world4'},
+        ]
+        yield self.assertExported(items, header, rows,
+                                  settings=settings, ordered=True)
+
+    @defer.inlineCallbacks
+    def test_export_dicts(self):
+        # When dicts are used, only keys from the first row are used as
+        # a header for CSV, and all fields are used for JSON Lines.
+        items = [
+            {'foo': 'bar', 'egg': 'spam'},
+            {'foo': 'bar', 'egg': 'spam', 'baz': 'quux'},
+        ]
+        rows_csv = [
+            {'egg': 'spam', 'foo': 'bar'},
+            {'egg': 'spam', 'foo': 'bar'}
+        ]
+        rows_jl = items
+        yield self.assertExportedCsv(items, ['egg', 'foo'], rows_csv, ordered=False)
+        yield self.assertExportedJsonLines(items, rows_jl)
+
+    @defer.inlineCallbacks
+    def test_export_feed_export_fields(self):
+        # FEED_EXPORT_FIELDS option allows to order export fields
+        # and to select a subset of fields to export, both for Items and dicts.
+
+        for item_cls in [self.MyItem, dict]:
+            items = [
+                item_cls({'foo': 'bar1', 'egg': 'spam1'}),
+                item_cls({'foo': 'bar2', 'egg': 'spam2', 'baz': 'quux2'}),
+            ]
+
+            # export all columns
+            settings = {'FEED_EXPORT_FIELDS': 'foo,baz,egg'}
+            rows = [
+                {'egg': 'spam1', 'foo': 'bar1', 'baz': ''},
+                {'egg': 'spam2', 'foo': 'bar2', 'baz': 'quux2'}
+            ]
+            yield self.assertExported(items, ['foo', 'baz', 'egg'], rows,
+                                      settings=settings, ordered=True)
+
+            # export a subset of columns
+            settings = {'FEED_EXPORT_FIELDS': 'egg,baz'}
+            rows = [
+                {'egg': 'spam1', 'baz': ''},
+                {'egg': 'spam2', 'baz': 'quux2'}
+            ]
+            yield self.assertExported(items, ['egg', 'baz'], rows,
+                                      settings=settings, ordered=True)
+
+    @defer.inlineCallbacks
+    def test_export_encoding(self):
+        items = [dict({'foo': 'Test\xd6'})]
+
+        formats = {
+            'json': '[{"foo": "Test\\u00d6"}]'.encode('utf-8'),
+            'jsonlines': '{"foo": "Test\\u00d6"}\n'.encode('utf-8'),
+            'xml': (
+                '<?xml version="1.0" encoding="utf-8"?>\n'
+                '<items><item><foo>Test\xd6</foo></item></items>'
+            ).encode('utf-8'),
+            'csv': 'foo\r\nTest\xd6\r\n'.encode('utf-8'),
+        }
+
+        for fmt, expected in formats.items():
+            settings = {
+                'FEEDS': {
+                    self._random_temp_filename(): {'format': fmt},
+                },
+                'FEED_EXPORT_INDENT': None,
+            }
+            data = yield self.exported_data(items, settings)
+            self.assertEqual(expected, data[fmt])
+
+        formats = {
+            'json': '[{"foo": "Test\xd6"}]'.encode('latin-1'),
+            'jsonlines': '{"foo": "Test\xd6"}\n'.encode('latin-1'),
+            'xml': (
+                '<?xml version="1.0" encoding="latin-1"?>\n'
+                '<items><item><foo>Test\xd6</foo></item></items>'
+            ).encode('latin-1'),
+            'csv': 'foo\r\nTest\xd6\r\n'.encode('latin-1'),
+        }
+
+        for fmt, expected in formats.items():
+            settings = {
+                'FEEDS': {
+                    self._random_temp_filename(): {'format': fmt},
+                },
+                'FEED_EXPORT_INDENT': None,
+                'FEED_EXPORT_ENCODING': 'latin-1',
+            }
+            data = yield self.exported_data(items, settings)
+            self.assertEqual(expected, data[fmt])
+
+    @defer.inlineCallbacks
+    def test_export_multiple_configs(self):
+        items = [dict({'foo': 'FOO', 'bar': 'BAR'})]
+
+        formats = {
+            'json': '[\n{"bar": "BAR"}\n]'.encode('utf-8'),
+            'xml': (
+                '<?xml version="1.0" encoding="latin-1"?>\n'
+                '<items>\n  <item>\n    <foo>FOO</foo>\n  </item>\n</items>'
+            ).encode('latin-1'),
+            'csv': 'bar,foo\r\nBAR,FOO\r\n'.encode('utf-8'),
+        }
+
+        settings = {
+            'FEEDS': {
+                self._random_temp_filename(): {
+                    'format': 'json',
+                    'indent': 0,
+                    'fields': ['bar'],
+                    'encoding': 'utf-8',
+                },
+                self._random_temp_filename(): {
+                    'format': 'xml',
+                    'indent': 2,
+                    'fields': ['foo'],
+                    'encoding': 'latin-1',
+                },
+                self._random_temp_filename(): {
+                    'format': 'csv',
+                    'indent': None,
+                    'fields': ['bar', 'foo'],
+                    'encoding': 'utf-8',
+                },
+            },
+        }
+
+        data = yield self.exported_data(items, settings)
+        for fmt, expected in formats.items():
+            self.assertEqual(expected, data[fmt])
+
+    @defer.inlineCallbacks
+    def test_export_indentation(self):
+        items = [
+            {'foo': ['bar']},
+            {'key': 'value'},
+        ]
+
+        test_cases = [
+            # JSON
+            {
+                'format': 'json',
+                'indent': None,
+                'expected': b'[{"foo": ["bar"]},{"key": "value"}]',
+            },
+            {
+                'format': 'json',
+                'indent': -1,
+                'expected': b"""[
+{"foo": ["bar"]},
+{"key": "value"}
+]""",
+            },
+            {
+                'format': 'json',
+                'indent': 0,
+                'expected': b"""[
+{"foo": ["bar"]},
+{"key": "value"}
+]""",
+            },
+            {
+                'format': 'json',
+                'indent': 2,
+                'expected': b"""[
+{
+  "foo": [
+    "bar"
+  ]
+},
+{
+  "key": "value"
+}
+]""",
+            },
+            {
+                'format': 'json',
+                'indent': 4,
+                'expected': b"""[
+{
+    "foo": [
+        "bar"
+    ]
+},
+{
+    "key": "value"
+}
+]""",
+            },
+            {
+                'format': 'json',
+                'indent': 5,
+                'expected': b"""[
+{
+     "foo": [
+          "bar"
+     ]
+},
+{
+     "key": "value"
+}
+]""",
+            },
+
+            # XML
+            {
+                'format': 'xml',
+                'indent': None,
+                'expected': b"""<?xml version="1.0" encoding="utf-8"?>
+<items><item><foo><value>bar</value></foo></item><item><key>value</key></item></items>""",
+            },
+            {
+                'format': 'xml',
+                'indent': -1,
+                'expected': b"""<?xml version="1.0" encoding="utf-8"?>
+<items>
+<item><foo><value>bar</value></foo></item>
+<item><key>value</key></item>
+</items>""",
+            },
+            {
+                'format': 'xml',
+                'indent': 0,
+                'expected': b"""<?xml version="1.0" encoding="utf-8"?>
+<items>
+<item><foo><value>bar</value></foo></item>
+<item><key>value</key></item>
+</items>""",
+            },
+            {
+                'format': 'xml',
+                'indent': 2,
+                'expected': b"""<?xml version="1.0" encoding="utf-8"?>
+<items>
+  <item>
+    <foo>
+      <value>bar</value>
+    </foo>
+  </item>
+  <item>
+    <key>value</key>
+  </item>
+</items>""",
+            },
+            {
+                'format': 'xml',
+                'indent': 4,
+                'expected': b"""<?xml version="1.0" encoding="utf-8"?>
+<items>
+    <item>
+        <foo>
+            <value>bar</value>
+        </foo>
+    </item>
+    <item>
+        <key>value</key>
+    </item>
+</items>""",
+            },
+            {
+                'format': 'xml',
+                'indent': 5,
+                'expected': b"""<?xml version="1.0" encoding="utf-8"?>
+<items>
+     <item>
+          <foo>
+               <value>bar</value>
+          </foo>
+     </item>
+     <item>
+          <key>value</key>
+     </item>
+</items>""",
+            },
+        ]
+
+        for row in test_cases:
+            settings = {
+                'FEEDS': {
+                    self._random_temp_filename(): {
+                        'format': row['format'],
+                        'indent': row['indent'],
+                    },
+                },
+            }
+            data = yield self.exported_data(items, settings)
+            self.assertEqual(row['expected'], data[row['format']])
+
+    @defer.inlineCallbacks
+    def test_init_exporters_storages_with_crawler(self):
+        settings = {
+            'FEED_EXPORTERS': {'csv': 'tests.test_feedexport.FromCrawlerCsvItemExporter'},
+            'FEED_STORAGES': {'file': 'tests.test_feedexport.FromCrawlerFileFeedStorage'},
+            'FEEDS': {
+                self._random_temp_filename(): {'format': 'csv'},
+            },
+        }
+        yield self.exported_data(items=[], settings=settings)
+        self.assertTrue(FromCrawlerCsvItemExporter.init_with_crawler)
+        self.assertTrue(FromCrawlerFileFeedStorage.init_with_crawler)
+
+    @defer.inlineCallbacks
+    def test_pathlib_uri(self):
+        feed_path = Path(self._random_temp_filename())
+        settings = {
+            'FEED_STORE_EMPTY': True,
+            'FEEDS': {
+                feed_path: {'format': 'csv'}
+            },
+        }
+        data = yield self.exported_no_data(settings)
+        self.assertEqual(data['csv'], b'')
+
+    @defer.inlineCallbacks
+    def test_multiple_feeds_success_logs_blocking_feed_storage(self):
+        settings = {
+            'FEEDS': {
+                self._random_temp_filename(): {'format': 'json'},
+                self._random_temp_filename(): {'format': 'xml'},
+                self._random_temp_filename(): {'format': 'csv'},
+            },
+            'FEED_STORAGES': {'file': 'tests.test_feedexport.DummyBlockingFeedStorage'},
+        }
+        items = [
+            {'foo': 'bar1', 'baz': ''},
+            {'foo': 'bar2', 'baz': 'quux'},
+        ]
+        with LogCapture() as log:
+            yield self.exported_data(items, settings)
+
+        print(log)
+        for fmt in ['json', 'xml', 'csv']:
+            self.assertIn('Stored %s feed (2 items)' % fmt, str(log))
+
+    @defer.inlineCallbacks
+    def test_multiple_feeds_failing_logs_blocking_feed_storage(self):
+        settings = {
+            'FEEDS': {
+                self._random_temp_filename(): {'format': 'json'},
+                self._random_temp_filename(): {'format': 'xml'},
+                self._random_temp_filename(): {'format': 'csv'},
+            },
+            'FEED_STORAGES': {'file': 'tests.test_feedexport.FailingBlockingFeedStorage'},
+        }
+        items = [
+            {'foo': 'bar1', 'baz': ''},
+            {'foo': 'bar2', 'baz': 'quux'},
+        ]
+        with LogCapture() as log:
+            yield self.exported_data(items, settings)
+
+        print(log)
+        for fmt in ['json', 'xml', 'csv']:
+            self.assertIn('Error storing %s feed (2 items)' % fmt, str(log))
+
+
+class BatchDeliveriesTest(FeedExportTestBase):
+    __test__ = True
+    _file_mark = '_%(batch_time)s_#%(batch_id)02d_'
+
+    @defer.inlineCallbacks
+    def run_and_export(self, spider_cls, settings):
+        """ Run spider with specified settings; return exported data. """
+
+        def build_url(path):
+            if path[0] != '/':
+                path = '/' + path
+            return urljoin('file:', path)
+
+        FEEDS = settings.get('FEEDS') or {}
+        settings['FEEDS'] = {
+            build_url(file_path): feed
+            for file_path, feed in FEEDS.items()
+        }
+        content = defaultdict(list)
+        try:
+            with MockServer() as s:
+                runner = CrawlerRunner(Settings(settings))
+                spider_cls.start_urls = [s.url('/')]
+                yield runner.crawl(spider_cls)
+
+            for path, feed in FEEDS.items():
+                dir_name = os.path.dirname(path)
+                for file in sorted(os.listdir(dir_name)):
+                    with open(os.path.join(dir_name, file), 'rb') as f:
+                        data = f.read()
+                        content[feed['format']].append(data)
+        finally:
+            self.tearDown()
+        defer.returnValue(content)
+
+    @defer.inlineCallbacks
+    def assertExportedJsonLines(self, items, rows, settings=None):
+        settings = settings or {}
+        settings.update({
+            'FEEDS': {
+                os.path.join(self._random_temp_filename(), 'jl', self._file_mark): {'format': 'jl'},
+            },
+        })
+        batch_size = settings.getint('FEED_EXPORT_BATCH_ITEM_COUNT')
+        rows = [{k: v for k, v in row.items() if v} for row in rows]
+        data = yield self.exported_data(items, settings)
+        for batch in data['jl']:
+            got_batch = [json.loads(to_unicode(batch_item)) for batch_item in batch.splitlines()]
+            expected_batch, rows = rows[:batch_size], rows[batch_size:]
+            self.assertEqual(expected_batch, got_batch)
+
+    @defer.inlineCallbacks
+    def assertExportedCsv(self, items, header, rows, settings=None, ordered=True):
+        settings = settings or {}
+        settings.update({
+            'FEEDS': {
+                os.path.join(self._random_temp_filename(), 'csv', self._file_mark): {'format': 'csv'},
+            },
+        })
+        batch_size = settings.getint('FEED_EXPORT_BATCH_ITEM_COUNT')
+        data = yield self.exported_data(items, settings)
+        for batch in data['csv']:
+            got_batch = csv.DictReader(to_unicode(batch).splitlines())
+            self.assertEqual(list(header), got_batch.fieldnames)
+            expected_batch, rows = rows[:batch_size], rows[batch_size:]
+            self.assertEqual(expected_batch, list(got_batch))
+
+    @defer.inlineCallbacks
+    def assertExportedXml(self, items, rows, settings=None):
+        settings = settings or {}
+        settings.update({
+            'FEEDS': {
+                os.path.join(self._random_temp_filename(), 'xml', self._file_mark): {'format': 'xml'},
+            },
+        })
+        batch_size = settings.getint('FEED_EXPORT_BATCH_ITEM_COUNT')
+        rows = [{k: v for k, v in row.items() if v} for row in rows]
+        data = yield self.exported_data(items, settings)
+        for batch in data['xml']:
+            root = lxml.etree.fromstring(batch)
+            got_batch = [{e.tag: e.text for e in it} for it in root.findall('item')]
+            expected_batch, rows = rows[:batch_size], rows[batch_size:]
+            self.assertEqual(expected_batch, got_batch)
+
+    @defer.inlineCallbacks
+    def assertExportedMultiple(self, items, rows, settings=None):
+        settings = settings or {}
+        settings.update({
+            'FEEDS': {
+                os.path.join(self._random_temp_filename(), 'xml', self._file_mark): {'format': 'xml'},
+                os.path.join(self._random_temp_filename(), 'json', self._file_mark): {'format': 'json'},
+            },
+        })
+        batch_size = settings.getint('FEED_EXPORT_BATCH_ITEM_COUNT')
+        rows = [{k: v for k, v in row.items() if v} for row in rows]
+        data = yield self.exported_data(items, settings)
+        # XML
+        xml_rows = rows.copy()
+        for batch in data['xml']:
+            root = lxml.etree.fromstring(batch)
+            got_batch = [{e.tag: e.text for e in it} for it in root.findall('item')]
+            expected_batch, xml_rows = xml_rows[:batch_size], xml_rows[batch_size:]
+            self.assertEqual(expected_batch, got_batch)
+        # JSON
+        json_rows = rows.copy()
+        for batch in data['json']:
+            got_batch = json.loads(batch.decode('utf-8'))
+            expected_batch, json_rows = json_rows[:batch_size], json_rows[batch_size:]
+            self.assertEqual(expected_batch, got_batch)
+
+    @defer.inlineCallbacks
+    def assertExportedPickle(self, items, rows, settings=None):
+        settings = settings or {}
+        settings.update({
+            'FEEDS': {
+                os.path.join(self._random_temp_filename(), 'pickle', self._file_mark): {'format': 'pickle'},
+            },
+        })
+        batch_size = settings.getint('FEED_EXPORT_BATCH_ITEM_COUNT')
+        rows = [{k: v for k, v in row.items() if v} for row in rows]
+        data = yield self.exported_data(items, settings)
+        import pickle
+        for batch in data['pickle']:
+            got_batch = self._load_until_eof(batch, load_func=pickle.load)
+            expected_batch, rows = rows[:batch_size], rows[batch_size:]
+            self.assertEqual(expected_batch, got_batch)
+
+    @defer.inlineCallbacks
+    def assertExportedMarshal(self, items, rows, settings=None):
+        settings = settings or {}
+        settings.update({
+            'FEEDS': {
+                os.path.join(self._random_temp_filename(), 'marshal', self._file_mark): {'format': 'marshal'},
+            },
+        })
+        batch_size = settings.getint('FEED_EXPORT_BATCH_ITEM_COUNT')
+        rows = [{k: v for k, v in row.items() if v} for row in rows]
+        data = yield self.exported_data(items, settings)
+        import marshal
+        for batch in data['marshal']:
+            got_batch = self._load_until_eof(batch, load_func=marshal.load)
+            expected_batch, rows = rows[:batch_size], rows[batch_size:]
+            self.assertEqual(expected_batch, got_batch)
+
+    @defer.inlineCallbacks
+    def test_export_items(self):
+        """ Test partial deliveries in all supported formats """
+        items = [
+            self.MyItem({'foo': 'bar1', 'egg': 'spam1'}),
+            self.MyItem({'foo': 'bar2', 'egg': 'spam2', 'baz': 'quux2'}),
+            self.MyItem({'foo': 'bar3', 'baz': 'quux3'}),
+        ]
+        rows = [
+            {'egg': 'spam1', 'foo': 'bar1', 'baz': ''},
+            {'egg': 'spam2', 'foo': 'bar2', 'baz': 'quux2'},
+            {'foo': 'bar3', 'baz': 'quux3', 'egg': ''}
+        ]
+        settings = {
+            'FEED_EXPORT_BATCH_ITEM_COUNT': 2
+        }
+        header = self.MyItem.fields.keys()
+        yield self.assertExported(items, header, rows, settings=Settings(settings))
+
+    def test_wrong_path(self):
+        """ If path is without %(batch_time)s and %(batch_id) an exception must be raised """
+        settings = {
+            'FEEDS': {
+                self._random_temp_filename(): {'format': 'xml'},
+            },
+            'FEED_EXPORT_BATCH_ITEM_COUNT': 1
+        }
+        crawler = get_crawler(settings_dict=settings)
+        self.assertRaises(NotConfigured, FeedExporter, crawler)
+
+    @defer.inlineCallbacks
+    def test_export_no_items_not_store_empty(self):
+        for fmt in ('json', 'jsonlines', 'xml', 'csv'):
+            settings = {
+                'FEEDS': {
+                    os.path.join(self._random_temp_filename(), fmt, self._file_mark): {'format': fmt},
+                },
+                'FEED_EXPORT_BATCH_ITEM_COUNT': 1
+            }
+            data = yield self.exported_no_data(settings)
+            data = dict(data)
+            self.assertEqual(b'', data[fmt][0])
+
+    @defer.inlineCallbacks
+    def test_export_no_items_store_empty(self):
+        formats = (
+            ('json', b'[]'),
+            ('jsonlines', b''),
+            ('xml', b'<?xml version="1.0" encoding="utf-8"?>\n<items></items>'),
+            ('csv', b''),
+        )
+
+        for fmt, expctd in formats:
+            settings = {
+                'FEEDS': {
+                    os.path.join(self._random_temp_filename(), fmt, self._file_mark): {'format': fmt},
+                },
+                'FEED_STORE_EMPTY': True,
+                'FEED_EXPORT_INDENT': None,
+                'FEED_EXPORT_BATCH_ITEM_COUNT': 1,
+            }
+            data = yield self.exported_no_data(settings)
+            data = dict(data)
+            self.assertEqual(expctd, data[fmt][0])
+
+    @defer.inlineCallbacks
+    def test_export_multiple_configs(self):
+        items = [dict({'foo': 'FOO', 'bar': 'BAR'}), dict({'foo': 'FOO1', 'bar': 'BAR1'})]
+
+        formats = {
+            'json': ['[\n{"bar": "BAR"}\n]'.encode('utf-8'),
+                     '[\n{"bar": "BAR1"}\n]'.encode('utf-8')],
+            'xml': [
+                (
+                    '<?xml version="1.0" encoding="latin-1"?>\n'
+                    '<items>\n  <item>\n    <foo>FOO</foo>\n  </item>\n</items>'
+                ).encode('latin-1'),
+                (
+                    '<?xml version="1.0" encoding="latin-1"?>\n'
+                    '<items>\n  <item>\n    <foo>FOO1</foo>\n  </item>\n</items>'
+                ).encode('latin-1')
+            ],
+            'csv': ['foo,bar\r\nFOO,BAR\r\n'.encode('utf-8'),
+                    'foo,bar\r\nFOO1,BAR1\r\n'.encode('utf-8')],
+        }
+
+        settings = {
+            'FEEDS': {
+                os.path.join(self._random_temp_filename(), 'json', self._file_mark): {
+                    'format': 'json',
+                    'indent': 0,
+                    'fields': ['bar'],
+                    'encoding': 'utf-8',
+                },
+                os.path.join(self._random_temp_filename(), 'xml', self._file_mark): {
+                    'format': 'xml',
+                    'indent': 2,
+                    'fields': ['foo'],
+                    'encoding': 'latin-1',
+                },
+                os.path.join(self._random_temp_filename(), 'csv', self._file_mark): {
+                    'format': 'csv',
+                    'indent': None,
+                    'fields': ['foo', 'bar'],
+                    'encoding': 'utf-8',
+                },
+            },
+            'FEED_EXPORT_BATCH_ITEM_COUNT': 1,
+        }
+        data = yield self.exported_data(items, settings)
+        for fmt, expected in formats.items():
+            for expected_batch, got_batch in zip(expected, data[fmt]):
+                self.assertEqual(expected_batch, got_batch)
+
+    @defer.inlineCallbacks
+    def test_batch_item_count_feeds_setting(self):
+        items = [dict({'foo': 'FOO'}), dict({'foo': 'FOO1'})]
+        formats = {
+            'json': ['[{"foo": "FOO"}]'.encode('utf-8'),
+                     '[{"foo": "FOO1"}]'.encode('utf-8')],
+        }
+        settings = {
+            'FEEDS': {
+                os.path.join(self._random_temp_filename(), 'json', self._file_mark): {
+                    'format': 'json',
+                    'indent': None,
+                    'encoding': 'utf-8',
+                    'batch_item_count': 1,
+                },
+            },
+        }
+        data = yield self.exported_data(items, settings)
+        for fmt, expected in formats.items():
+            for expected_batch, got_batch in zip(expected, data[fmt]):
+                self.assertEqual(expected_batch, got_batch)
+
+    @defer.inlineCallbacks
+    def test_batch_path_differ(self):
+        """
+        Test that the name of all batch files differ from each other.
+        So %(batch_time)s replaced with the current date.
+        """
+        items = [
+            self.MyItem({'foo': 'bar1', 'egg': 'spam1'}),
+            self.MyItem({'foo': 'bar2', 'egg': 'spam2', 'baz': 'quux2'}),
+            self.MyItem({'foo': 'bar3', 'baz': 'quux3'}),
+        ]
+        settings = {
+            'FEEDS': {
+                os.path.join(self._random_temp_filename(), '%(batch_time)s'): {
+                    'format': 'json',
+                },
+            },
+            'FEED_EXPORT_BATCH_ITEM_COUNT': 1,
+        }
+        data = yield self.exported_data(items, settings)
+        self.assertEqual(len(items) + 1, len(data['json']))
+
+    @defer.inlineCallbacks
+    def test_s3_export(self):
+        """
+        Test export of items into s3 bucket.
+        S3_TEST_BUCKET_NAME, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY must be specified in tox.ini
+        to perform this test:
+        [testenv]
+        setenv =
+            AWS_SECRET_ACCESS_KEY = ABCD
+            AWS_ACCESS_KEY_ID = EFGH
+            S3_TEST_BUCKET_NAME = IJKL
+        """
+        try:
+            import boto3
+        except ImportError:
+            raise unittest.SkipTest("S3FeedStorage requires boto3")
+
+        assert_aws_environ()
+        s3_test_bucket_name = os.environ.get('S3_TEST_BUCKET_NAME')
+        access_key = os.environ.get('AWS_ACCESS_KEY_ID')
+        secret_key = os.environ.get('AWS_SECRET_ACCESS_KEY')
+        if not s3_test_bucket_name:
+            raise unittest.SkipTest("No S3 BUCKET available for testing")
+
+        chars = [random.choice(ascii_letters + digits) for _ in range(15)]
+        filename = ''.join(chars)
+        prefix = 'tmp/{filename}'.format(filename=filename)
+        s3_test_file_uri = 's3://{bucket_name}/{prefix}/%(batch_time)s.json'.format(
+            bucket_name=s3_test_bucket_name, prefix=prefix
+        )
+        storage = S3FeedStorage(s3_test_bucket_name, access_key, secret_key)
+        settings = Settings({
+            'FEEDS': {
+                s3_test_file_uri: {
+                    'format': 'json',
+                },
+            },
+            'FEED_EXPORT_BATCH_ITEM_COUNT': 1,
+        })
+        items = [
+            self.MyItem({'foo': 'bar1', 'egg': 'spam1'}),
+            self.MyItem({'foo': 'bar2', 'egg': 'spam2', 'baz': 'quux2'}),
+            self.MyItem({'foo': 'bar3', 'baz': 'quux3'}),
+        ]
+        verifyObject(IFeedStorage, storage)
+
+        class TestSpider(scrapy.Spider):
+            name = 'testspider'
+
+            def parse(self, response):
+                for item in items:
+                    yield item
+
+        s3 = boto3.resource('s3')
+        my_bucket = s3.Bucket(s3_test_bucket_name)
+        batch_size = settings.getint('FEED_EXPORT_BATCH_ITEM_COUNT')
+
+        with MockServer() as s:
+            runner = CrawlerRunner(Settings(settings))
+            TestSpider.start_urls = [s.url('/')]
+            yield runner.crawl(TestSpider)
+
+        for file_uri in my_bucket.objects.filter(Prefix=prefix):
+            content = get_s3_content_and_delete(s3_test_bucket_name, file_uri.key)
+            if not content and not items:
+                break
+            content = json.loads(content.decode('utf-8'))
+            expected_batch, items = items[:batch_size], items[batch_size:]
+            self.assertEqual(expected_batch, content)
+
+
+class FeedExportInitTest(unittest.TestCase):
+
+    def test_unsupported_storage(self):
+        settings = {
+            'FEEDS': {
+                'unsupported://uri': {},
+            },
+        }
+        crawler = get_crawler(settings_dict=settings)
+        with self.assertRaises(NotConfigured):
+            FeedExporter.from_crawler(crawler)
+
+    def test_unsupported_format(self):
+        settings = {
+            'FEEDS': {
+                'file://path': {
+                    'format': 'unsupported_format',
+                },
+            },
+        }
+        crawler = get_crawler(settings_dict=settings)
+        with self.assertRaises(NotConfigured):
+            FeedExporter.from_crawler(crawler)
+
+
+class StdoutFeedStorageWithoutFeedOptions(StdoutFeedStorage):
+
+    def __init__(self, uri):
+        super().__init__(uri)
+
+
+class StdoutFeedStoragePreFeedOptionsTest(unittest.TestCase):
+    """Make sure that any feed exporter created by users before the
+    introduction of the ``feed_options`` parameter continues to work as
+    expected, and simply issues a warning."""
+
+    def test_init(self):
+        settings_dict = {
+            'FEED_URI': 'file:///tmp/foobar',
+            'FEED_STORAGES': {
+                'file': 'tests.test_feedexport.StdoutFeedStorageWithoutFeedOptions'
+            },
+        }
+        crawler = get_crawler(settings_dict=settings_dict)
+        feed_exporter = FeedExporter.from_crawler(crawler)
+        spider = scrapy.Spider("default")
+        with warnings.catch_warnings(record=True) as w:
+            feed_exporter.open_spider(spider)
+            messages = tuple(str(item.message) for item in w
+                             if item.category is ScrapyDeprecationWarning)
+            self.assertEqual(
+                messages,
+                (
+                    (
+                        "StdoutFeedStorageWithoutFeedOptions does not support "
+                        "the 'feed_options' keyword argument. Add a "
+                        "'feed_options' parameter to its signature to remove "
+                        "this warning. This parameter will become mandatory "
+                        "in a future version of Scrapy."
+                    ),
+                )
+            )
+
+
+class FileFeedStorageWithoutFeedOptions(FileFeedStorage):
+
+    def __init__(self, uri):
+        super().__init__(uri)
+
+
+class FileFeedStoragePreFeedOptionsTest(unittest.TestCase):
+    """Make sure that any feed exporter created by users before the
+    introduction of the ``feed_options`` parameter continues to work as
+    expected, and simply issues a warning."""
+
+    maxDiff = None
+
+    def test_init(self):
+        settings_dict = {
+            'FEED_URI': 'file:///tmp/foobar',
+            'FEED_STORAGES': {
+                'file': 'tests.test_feedexport.FileFeedStorageWithoutFeedOptions'
+            },
+        }
+        crawler = get_crawler(settings_dict=settings_dict)
+        feed_exporter = FeedExporter.from_crawler(crawler)
+        spider = scrapy.Spider("default")
+        with warnings.catch_warnings(record=True) as w:
+            feed_exporter.open_spider(spider)
+            messages = tuple(str(item.message) for item in w
+                             if item.category is ScrapyDeprecationWarning)
+            self.assertEqual(
+                messages,
+                (
+                    (
+                        "FileFeedStorageWithoutFeedOptions does not support "
+                        "the 'feed_options' keyword argument. Add a "
+                        "'feed_options' parameter to its signature to remove "
+                        "this warning. This parameter will become mandatory "
+                        "in a future version of Scrapy."
+                    ),
+                )
+            )
+
+
+class S3FeedStorageWithoutFeedOptions(S3FeedStorage):
+
+    def __init__(self, uri, access_key, secret_key, acl):
+        super().__init__(uri, access_key, secret_key, acl)
+
+
+class S3FeedStorageWithoutFeedOptionsWithFromCrawler(S3FeedStorage):
+
+    @classmethod
+    def from_crawler(cls, crawler, uri):
+        return super().from_crawler(crawler, uri)
+
+
+class S3FeedStoragePreFeedOptionsTest(unittest.TestCase):
+    """Make sure that any feed exporter created by users before the
+    introduction of the ``feed_options`` parameter continues to work as
+    expected, and simply issues a warning."""
+
+    maxDiff = None
+
+    def test_init(self):
+        settings_dict = {
+            'FEED_URI': 'file:///tmp/foobar',
+            'FEED_STORAGES': {
+                'file': 'tests.test_feedexport.S3FeedStorageWithoutFeedOptions'
+            },
+        }
+        crawler = get_crawler(settings_dict=settings_dict)
+        feed_exporter = FeedExporter.from_crawler(crawler)
+        spider = scrapy.Spider("default")
+        spider.crawler = crawler
+        with warnings.catch_warnings(record=True) as w:
+            feed_exporter.open_spider(spider)
+            messages = tuple(str(item.message) for item in w
+                             if item.category is ScrapyDeprecationWarning)
+            self.assertEqual(
+                messages,
+                (
+                    (
+                        "S3FeedStorageWithoutFeedOptions does not support "
+                        "the 'feed_options' keyword argument. Add a "
+                        "'feed_options' parameter to its signature to remove "
+                        "this warning. This parameter will become mandatory "
+                        "in a future version of Scrapy."
+                    ),
+                )
+            )
+
+    def test_from_crawler(self):
+        settings_dict = {
+            'FEED_URI': 'file:///tmp/foobar',
+            'FEED_STORAGES': {
+                'file': 'tests.test_feedexport.S3FeedStorageWithoutFeedOptionsWithFromCrawler'
+            },
+        }
+        crawler = get_crawler(settings_dict=settings_dict)
+        feed_exporter = FeedExporter.from_crawler(crawler)
+        spider = scrapy.Spider("default")
+        spider.crawler = crawler
+        with warnings.catch_warnings(record=True) as w:
+            feed_exporter.open_spider(spider)
+            messages = tuple(str(item.message) for item in w
+                             if item.category is ScrapyDeprecationWarning)
+            self.assertEqual(
+                messages,
+                (
+                    (
+                        "S3FeedStorageWithoutFeedOptionsWithFromCrawler.from_crawler "
+                        "does not support the 'feed_options' keyword argument. Add a "
+                        "'feed_options' parameter to its signature to remove "
+                        "this warning. This parameter will become mandatory "
+                        "in a future version of Scrapy."
+                    ),
+                )
+            )
+
+
+class FTPFeedStorageWithoutFeedOptions(FTPFeedStorage):
+
+    def __init__(self, uri, use_active_mode=False):
+        super().__init__(uri)
+
+
+class FTPFeedStorageWithoutFeedOptionsWithFromCrawler(FTPFeedStorage):
+
+    @classmethod
+    def from_crawler(cls, crawler, uri):
+        return super().from_crawler(crawler, uri)
+
+
+class FTPFeedStoragePreFeedOptionsTest(unittest.TestCase):
+    """Make sure that any feed exporter created by users before the
+    introduction of the ``feed_options`` parameter continues to work as
+    expected, and simply issues a warning."""
+
+    maxDiff = None
+
+    def test_init(self):
+        settings_dict = {
+            'FEED_URI': 'file:///tmp/foobar',
+            'FEED_STORAGES': {
+                'file': 'tests.test_feedexport.FTPFeedStorageWithoutFeedOptions'
+            },
+        }
+        crawler = get_crawler(settings_dict=settings_dict)
+        feed_exporter = FeedExporter.from_crawler(crawler)
+        spider = scrapy.Spider("default")
+        spider.crawler = crawler
+        with warnings.catch_warnings(record=True) as w:
+            feed_exporter.open_spider(spider)
+            messages = tuple(str(item.message) for item in w
+                             if item.category is ScrapyDeprecationWarning)
+            self.assertEqual(
+                messages,
+                (
+                    (
+                        "FTPFeedStorageWithoutFeedOptions does not support "
+                        "the 'feed_options' keyword argument. Add a "
+                        "'feed_options' parameter to its signature to remove "
+                        "this warning. This parameter will become mandatory "
+                        "in a future version of Scrapy."
+                    ),
+                )
+            )
+
+    def test_from_crawler(self):
+        settings_dict = {
+            'FEED_URI': 'file:///tmp/foobar',
+            'FEED_STORAGES': {
+                'file': 'tests.test_feedexport.FTPFeedStorageWithoutFeedOptionsWithFromCrawler'
+            },
+        }
+        crawler = get_crawler(settings_dict=settings_dict)
+        feed_exporter = FeedExporter.from_crawler(crawler)
+        spider = scrapy.Spider("default")
+        spider.crawler = crawler
+        with warnings.catch_warnings(record=True) as w:
+            feed_exporter.open_spider(spider)
+            messages = tuple(str(item.message) for item in w
+                             if item.category is ScrapyDeprecationWarning)
+            self.assertEqual(
+                messages,
+                (
+                    (
+                        "FTPFeedStorageWithoutFeedOptionsWithFromCrawler.from_crawler "
+                        "does not support the 'feed_options' keyword argument. Add a "
+                        "'feed_options' parameter to its signature to remove "
+                        "this warning. This parameter will become mandatory "
+                        "in a future version of Scrapy."
+                    ),
+                )
+            )
diff --git a/tests/test_http_cookies.py b/tests/test_http_cookies.py
index 3d6993491..540e27907 100644
--- a/tests/test_http_cookies.py
+++ b/tests/test_http_cookies.py
@@ -1,4 +1,4 @@
-from six.moves.urllib.parse import urlparse
+from urllib.parse import urlparse
 from unittest import TestCase
 
 from scrapy.http import Request, Response
@@ -8,28 +8,34 @@ from scrapy.http.cookies import WrappedRequest, WrappedResponse
 class WrappedRequestTest(TestCase):
 
     def setUp(self):
-        self.request = Request("http://www.example.com/page.html", \
-            headers={"Content-Type": "text/html"})
+        self.request = Request("http://www.example.com/page.html",
+                               headers={"Content-Type": "text/html"})
         self.wrapped = WrappedRequest(self.request)
 
     def test_get_full_url(self):
         self.assertEqual(self.wrapped.get_full_url(), self.request.url)
+        self.assertEqual(self.wrapped.full_url, self.request.url)
 
     def test_get_host(self):
         self.assertEqual(self.wrapped.get_host(), urlparse(self.request.url).netloc)
+        self.assertEqual(self.wrapped.host, urlparse(self.request.url).netloc)
 
     def test_get_type(self):
         self.assertEqual(self.wrapped.get_type(), urlparse(self.request.url).scheme)
+        self.assertEqual(self.wrapped.type, urlparse(self.request.url).scheme)
 
     def test_is_unverifiable(self):
         self.assertFalse(self.wrapped.is_unverifiable())
+        self.assertFalse(self.wrapped.unverifiable)
 
     def test_is_unverifiable2(self):
         self.request.meta['is_unverifiable'] = True
         self.assertTrue(self.wrapped.is_unverifiable())
+        self.assertTrue(self.wrapped.unverifiable)
 
     def test_get_origin_req_host(self):
         self.assertEqual(self.wrapped.get_origin_req_host(), 'www.example.com')
+        self.assertEqual(self.wrapped.origin_req_host, 'www.example.com')
 
     def test_has_header(self):
         self.assertTrue(self.wrapped.has_header('content-type'))
@@ -40,21 +46,24 @@ class WrappedRequestTest(TestCase):
         self.assertEqual(self.wrapped.get_header('xxxxx', 'def'), 'def')
 
     def test_header_items(self):
-        self.assertEqual(self.wrapped.header_items(), [('Content-Type', ['text/html'])])
+        self.assertEqual(self.wrapped.header_items(),
+                         [('Content-Type', ['text/html'])])
 
     def test_add_unredirected_header(self):
         self.wrapped.add_unredirected_header('hello', 'world')
-        self.assertEqual(self.request.headers['hello'], 'world')
+        self.assertEqual(self.request.headers['hello'], b'world')
+
 
 class WrappedResponseTest(TestCase):
 
     def setUp(self):
-        self.response = Response("http://www.example.com/page.html", 
-            headers={"Content-TYpe": "text/html"})
+        self.response = Response("http://www.example.com/page.html",
+                                 headers={"Content-TYpe": "text/html"})
         self.wrapped = WrappedResponse(self.response)
 
     def test_info(self):
-        self.assert_(self.wrapped.info() is self.wrapped)
+        self.assertIs(self.wrapped.info(), self.wrapped)
 
-    def test_getheaders(self):
-        self.assertEqual(self.wrapped.getheaders('content-type'), ['text/html'])
+    def test_get_all(self):
+        # get_all result must be native string
+        self.assertEqual(self.wrapped.get_all('content-type'), ['text/html'])
diff --git a/tests/test_http_headers.py b/tests/test_http_headers.py
index 504937295..64ff7a73d 100644
--- a/tests/test_http_headers.py
+++ b/tests/test_http_headers.py
@@ -3,6 +3,7 @@ import copy
 
 from scrapy.http import Headers
 
+
 class HeadersTest(unittest.TestCase):
 
     def assertSortedEqual(self, first, second, msg=None):
@@ -38,19 +39,19 @@ class HeadersTest(unittest.TestCase):
         assert h.getlist('X-Forwarded-For') is not hlist
 
     def test_encode_utf8(self):
-        h = Headers({u'key': u'\xa3'}, encoding='utf-8')
+        h = Headers({'key': '\xa3'}, encoding='utf-8')
         key, val = dict(h).popitem()
         assert isinstance(key, bytes), key
         assert isinstance(val[0], bytes), val[0]
         self.assertEqual(val[0], b'\xc2\xa3')
 
     def test_encode_latin1(self):
-        h = Headers({u'key': u'\xa3'}, encoding='latin1')
+        h = Headers({'key': '\xa3'}, encoding='latin1')
         key, val = dict(h).popitem()
         self.assertEqual(val[0], b'\xa3')
 
     def test_encode_multiple(self):
-        h = Headers({u'key': [u'\xa3']}, encoding='utf-8')
+        h = Headers({'key': ['\xa3']}, encoding='utf-8')
         key, val = dict(h).popitem()
         self.assertEqual(val[0], b'\xc2\xa3')
 
@@ -85,9 +86,6 @@ class HeadersTest(unittest.TestCase):
         self.assertSortedEqual(h.items(),
                                [(b'X-Forwarded-For', [b'ip1', b'ip2']),
                                 (b'Content-Type', [b'text/html'])])
-        self.assertSortedEqual(h.iteritems(),
-                               [(b'X-Forwarded-For', [b'ip1', b'ip2']),
-                                (b'Content-Type', [b'text/html'])])
         self.assertSortedEqual(h.values(), [b'ip2', b'text/html'])
 
     def test_update(self):
@@ -147,11 +145,11 @@ class HeadersTest(unittest.TestCase):
         self.assertEqual(h1.getlist('hey'), [b'5'])
 
     def test_invalid_value(self):
-        self.assertRaisesRegexp(TypeError, 'Unsupported value type',
-                                Headers, {'foo': object()})
-        self.assertRaisesRegexp(TypeError, 'Unsupported value type',
-                                Headers().__setitem__, 'foo', object())
-        self.assertRaisesRegexp(TypeError, 'Unsupported value type',
-                                Headers().setdefault, 'foo', object())
-        self.assertRaisesRegexp(TypeError, 'Unsupported value type',
-                                Headers().setlist, 'foo', [object()])
+        self.assertRaisesRegex(TypeError, 'Unsupported value type',
+                               Headers, {'foo': object()})
+        self.assertRaisesRegex(TypeError, 'Unsupported value type',
+                               Headers().__setitem__, 'foo', object())
+        self.assertRaisesRegex(TypeError, 'Unsupported value type',
+                               Headers().setdefault, 'foo', object())
+        self.assertRaisesRegex(TypeError, 'Unsupported value type',
+                               Headers().setlist, 'foo', [object()])
diff --git a/tests/test_http_request.py b/tests/test_http_request.py
index c81eebfa6..0a303dbe2 100644
--- a/tests/test_http_request.py
+++ b/tests/test_http_request.py
@@ -1,9 +1,13 @@
-import cgi
 import unittest
-from six.moves import xmlrpc_client as xmlrpclib
-from six.moves.urllib.parse import urlparse
+import re
+import json
+import xmlrpc.client
+import warnings
+from unittest import mock
+from urllib.parse import parse_qs, unquote_to_bytes, urlparse
 
-from scrapy.http import Request, FormRequest, XmlRpcRequest, Headers, HtmlResponse
+from scrapy.http import Request, FormRequest, XmlRpcRequest, JsonRequest, Headers, HtmlResponse
+from scrapy.utils.python import to_bytes, to_unicode
 
 
 class RequestTest(unittest.TestCase):
@@ -14,7 +18,7 @@ class RequestTest(unittest.TestCase):
     default_meta = {}
 
     def test_init(self):
-        # Request requires url in the constructor
+        # Request requires url in the __init__ method
         self.assertRaises(Exception, self.request_class)
 
         # url argument must be basestring
@@ -31,21 +35,23 @@ class RequestTest(unittest.TestCase):
         self.assertEqual(r.meta, self.default_meta)
 
         meta = {"lala": "lolo"}
-        headers = {"caca": "coco"}
+        headers = {b"caca": b"coco"}
         r = self.request_class("http://www.example.com", meta=meta, headers=headers, body="a body")
 
         assert r.meta is not meta
         self.assertEqual(r.meta, meta)
         assert r.headers is not headers
-        self.assertEqual(r.headers["caca"], "coco")
+        self.assertEqual(r.headers[b"caca"], b"coco")
 
     def test_url_no_scheme(self):
         self.assertRaises(ValueError, self.request_class, 'foo')
+        self.assertRaises(ValueError, self.request_class, '/foo/')
+        self.assertRaises(ValueError, self.request_class, '/foo:bar')
 
     def test_headers(self):
         # Different ways of setting headers attribute
         url = 'http://www.scrapy.org'
-        headers = {'Accept':'gzip', 'Custom-Header':'nothing to tell you'}
+        headers = {b'Accept': 'gzip', b'Custom-Header': 'nothing to tell you'}
         r = self.request_class(url=url, headers=headers)
         p = self.request_class(url=url, headers=r.headers)
 
@@ -54,12 +60,12 @@ class RequestTest(unittest.TestCase):
         self.assertFalse(p.headers is r.headers)
 
         # headers must not be unicode
-        h = Headers({'key1': u'val1', u'key2': 'val2'})
-        h[u'newkey'] = u'newval'
-        for k, v in h.iteritems():
-            self.assert_(isinstance(k, str))
+        h = Headers({'key1': 'val1', 'key2': 'val2'})
+        h['newkey'] = 'newval'
+        for k, v in h.items():
+            self.assertIsInstance(k, bytes)
             for s in v:
-                self.assert_(isinstance(s, str))
+                self.assertIsInstance(s, bytes)
 
     def test_eq(self):
         url = 'http://www.scrapy.org'
@@ -73,44 +79,92 @@ class RequestTest(unittest.TestCase):
         self.assertEqual(len(set_), 2)
 
     def test_url(self):
-        """Request url tests"""
         r = self.request_class(url="http://www.scrapy.org/path")
         self.assertEqual(r.url, "http://www.scrapy.org/path")
 
-        # url quoting on creation
+    def test_url_quoting(self):
         r = self.request_class(url="http://www.scrapy.org/blank%20space")
         self.assertEqual(r.url, "http://www.scrapy.org/blank%20space")
         r = self.request_class(url="http://www.scrapy.org/blank space")
         self.assertEqual(r.url, "http://www.scrapy.org/blank%20space")
 
-        # url encoding
-        r1 = self.request_class(url=u"http://www.scrapy.org/price/\xa3", encoding="utf-8")
-        r2 = self.request_class(url=u"http://www.scrapy.org/price/\xa3", encoding="latin1")
-        self.assertEqual(r1.url, "http://www.scrapy.org/price/%C2%A3")
-        self.assertEqual(r2.url, "http://www.scrapy.org/price/%A3")
+    def test_url_encoding(self):
+        r = self.request_class(url="http://www.scrapy.org/price/£")
+        self.assertEqual(r.url, "http://www.scrapy.org/price/%C2%A3")
+
+    def test_url_encoding_other(self):
+        # encoding affects only query part of URI, not path
+        # path part should always be UTF-8 encoded before percent-escaping
+        r = self.request_class(url="http://www.scrapy.org/price/£", encoding="utf-8")
+        self.assertEqual(r.url, "http://www.scrapy.org/price/%C2%A3")
+
+        r = self.request_class(url="http://www.scrapy.org/price/£", encoding="latin1")
+        self.assertEqual(r.url, "http://www.scrapy.org/price/%C2%A3")
+
+    def test_url_encoding_query(self):
+        r1 = self.request_class(url="http://www.scrapy.org/price/£?unit=µ")
+        self.assertEqual(r1.url, "http://www.scrapy.org/price/%C2%A3?unit=%C2%B5")
+
+        # should be same as above
+        r2 = self.request_class(url="http://www.scrapy.org/price/£?unit=µ", encoding="utf-8")
+        self.assertEqual(r2.url, "http://www.scrapy.org/price/%C2%A3?unit=%C2%B5")
+
+    def test_url_encoding_query_latin1(self):
+        # encoding is used for encoding query-string before percent-escaping;
+        # path is still UTF-8 encoded before percent-escaping
+        r3 = self.request_class(url="http://www.scrapy.org/price/µ?currency=£", encoding="latin1")
+        self.assertEqual(r3.url, "http://www.scrapy.org/price/%C2%B5?currency=%A3")
+
+    def test_url_encoding_nonutf8_untouched(self):
+        # percent-escaping sequences that do not match valid UTF-8 sequences
+        # should be kept untouched (just upper-cased perhaps)
+        #
+        # See https://tools.ietf.org/html/rfc3987#section-3.2
+        #
+        # "Conversions from URIs to IRIs MUST NOT use any character encoding
+        # other than UTF-8 in steps 3 and 4, even if it might be possible to
+        # guess from the context that another character encoding than UTF-8 was
+        # used in the URI.  For example, the URI
+        # "http://www.example.org/r%E9sum%E9.html" might with some guessing be
+        # interpreted to contain two e-acute characters encoded as iso-8859-1.
+        # It must not be converted to an IRI containing these e-acute
+        # characters.  Otherwise, in the future the IRI will be mapped to
+        # "http://www.example.org/r%C3%A9sum%C3%A9.html", which is a different
+        # URI from "http://www.example.org/r%E9sum%E9.html".
+        r1 = self.request_class(url="http://www.scrapy.org/price/%a3")
+        self.assertEqual(r1.url, "http://www.scrapy.org/price/%a3")
+
+        r2 = self.request_class(url="http://www.scrapy.org/r%C3%A9sum%C3%A9/%a3")
+        self.assertEqual(r2.url, "http://www.scrapy.org/r%C3%A9sum%C3%A9/%a3")
+
+        r3 = self.request_class(url="http://www.scrapy.org/résumé/%a3")
+        self.assertEqual(r3.url, "http://www.scrapy.org/r%C3%A9sum%C3%A9/%a3")
+
+        r4 = self.request_class(url="http://www.example.org/r%E9sum%E9.html")
+        self.assertEqual(r4.url, "http://www.example.org/r%E9sum%E9.html")
 
     def test_body(self):
         r1 = self.request_class(url="http://www.example.com/")
-        assert r1.body == ''
+        assert r1.body == b''
 
-        r2 = self.request_class(url="http://www.example.com/", body="")
-        assert isinstance(r2.body, str)
-        self.assertEqual(r2.encoding, 'utf-8') # default encoding
+        r2 = self.request_class(url="http://www.example.com/", body=b"")
+        assert isinstance(r2.body, bytes)
+        self.assertEqual(r2.encoding, 'utf-8')  # default encoding
 
-        r3 = self.request_class(url="http://www.example.com/", body=u"Price: \xa3100", encoding='utf-8')
-        assert isinstance(r3.body, str)
-        self.assertEqual(r3.body, "Price: \xc2\xa3100")
+        r3 = self.request_class(url="http://www.example.com/", body="Price: \xa3100", encoding='utf-8')
+        assert isinstance(r3.body, bytes)
+        self.assertEqual(r3.body, b"Price: \xc2\xa3100")
 
-        r4 = self.request_class(url="http://www.example.com/", body=u"Price: \xa3100", encoding='latin1')
-        assert isinstance(r4.body, str)
-        self.assertEqual(r4.body, "Price: \xa3100")
+        r4 = self.request_class(url="http://www.example.com/", body="Price: \xa3100", encoding='latin1')
+        assert isinstance(r4.body, bytes)
+        self.assertEqual(r4.body, b"Price: \xa3100")
 
     def test_ajax_url(self):
         # ascii url
         r = self.request_class(url="http://www.example.com/ajax.html#!key=value")
         self.assertEqual(r.url, "http://www.example.com/ajax.html?_escaped_fragment_=key%3Dvalue")
         # unicode url
-        r = self.request_class(url=u"http://www.example.com/ajax.html#!key=value")
+        r = self.request_class(url="http://www.example.com/ajax.html#!key=value")
         self.assertEqual(r.url, "http://www.example.com/ajax.html?_escaped_fragment_=key%3Dvalue")
 
     def test_copy(self):
@@ -119,8 +173,10 @@ class RequestTest(unittest.TestCase):
         def somecallback():
             pass
 
-        r1 = self.request_class("http://www.example.com", callback=somecallback, errback=somecallback)
+        r1 = self.request_class("http://www.example.com", flags=['f1', 'f2'],
+                                callback=somecallback, errback=somecallback)
         r1.meta['foo'] = 'bar'
+        r1.cb_kwargs['key'] = 'value'
         r2 = r1.copy()
 
         # make sure copy does not propagate callbacks
@@ -129,6 +185,14 @@ class RequestTest(unittest.TestCase):
         assert r2.callback is r1.callback
         assert r2.errback is r2.errback
 
+        # make sure flags list is shallow copied
+        assert r1.flags is not r2.flags, "flags must be a shallow copy, not identical"
+        self.assertEqual(r1.flags, r2.flags)
+
+        # make sure cb_kwargs dict is shallow copied
+        assert r1.cb_kwargs is not r2.cb_kwargs, "cb_kwargs must be a shallow copy, not identical"
+        self.assertEqual(r1.cb_kwargs, r2.cb_kwargs)
+
         # make sure meta dict is shallow copied
         assert r1.meta is not r2.meta, "meta must be a shallow copy, not identical"
         self.assertEqual(r1.meta, r2.meta)
@@ -155,23 +219,24 @@ class RequestTest(unittest.TestCase):
     def test_replace(self):
         """Test Request.replace() method"""
         r1 = self.request_class("http://www.example.com", method='GET')
-        hdrs = Headers(dict(r1.headers, key='value'))
+        hdrs = Headers(r1.headers)
+        hdrs[b'key'] = b'value'
         r2 = r1.replace(method="POST", body="New body", headers=hdrs)
         self.assertEqual(r1.url, r2.url)
         self.assertEqual((r1.method, r2.method), ("GET", "POST"))
-        self.assertEqual((r1.body, r2.body), ('', "New body"))
+        self.assertEqual((r1.body, r2.body), (b'', b"New body"))
         self.assertEqual((r1.headers, r2.headers), (self.default_headers, hdrs))
 
         # Empty attributes (which may fail if not compared properly)
         r3 = self.request_class("http://www.example.com", meta={'a': 1}, dont_filter=True)
-        r4 = r3.replace(url="http://www.example.com/2", body='', meta={}, dont_filter=False)
+        r4 = r3.replace(url="http://www.example.com/2", body=b'', meta={}, dont_filter=False)
         self.assertEqual(r4.url, "http://www.example.com/2")
-        self.assertEqual(r4.body, '')
+        self.assertEqual(r4.body, b'')
         self.assertEqual(r4.meta, {})
         assert r4.dont_filter is False
 
     def test_method_always_str(self):
-        r = self.request_class("http://www.example.com", method=u"POST")
+        r = self.request_class("http://www.example.com", method="POST")
         assert isinstance(r.method, str)
 
     def test_immutable_attributes(self):
@@ -179,60 +244,282 @@ class RequestTest(unittest.TestCase):
         self.assertRaises(AttributeError, setattr, r, 'url', 'http://example2.com')
         self.assertRaises(AttributeError, setattr, r, 'body', 'xxx')
 
+    def test_callback_and_errback(self):
+        def a_function():
+            pass
+
+        r1 = self.request_class('http://example.com')
+        self.assertIsNone(r1.callback)
+        self.assertIsNone(r1.errback)
+
+        r2 = self.request_class('http://example.com', callback=a_function)
+        self.assertIs(r2.callback, a_function)
+        self.assertIsNone(r2.errback)
+
+        r3 = self.request_class('http://example.com', errback=a_function)
+        self.assertIsNone(r3.callback)
+        self.assertIs(r3.errback, a_function)
+
+        r4 = self.request_class(
+            url='http://example.com',
+            callback=a_function,
+            errback=a_function,
+        )
+        self.assertIs(r4.callback, a_function)
+        self.assertIs(r4.errback, a_function)
+
+    def test_callback_and_errback_type(self):
+        with self.assertRaises(TypeError):
+            self.request_class('http://example.com', callback='a_function')
+        with self.assertRaises(TypeError):
+            self.request_class('http://example.com', errback='a_function')
+        with self.assertRaises(TypeError):
+            self.request_class(
+                url='http://example.com',
+                callback='a_function',
+                errback='a_function',
+            )
+
+    def test_from_curl(self):
+        # Note: more curated tests regarding curl conversion are in
+        # `test_utils_curl.py`
+        curl_command = (
+            "curl 'http://httpbin.org/post' -X POST -H 'Cookie: _gauges_unique"
+            "_year=1; _gauges_unique=1; _gauges_unique_month=1; _gauges_unique"
+            "_hour=1; _gauges_unique_day=1' -H 'Origin: http://httpbin.org' -H"
+            " 'Accept-Encoding: gzip, deflate' -H 'Accept-Language: en-US,en;q"
+            "=0.9,ru;q=0.8,es;q=0.7' -H 'Upgrade-Insecure-Requests: 1' -H 'Use"
+            "r-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTM"
+            "L, like Gecko) Ubuntu Chromium/62.0.3202.75 Chrome/62.0.3202.75 S"
+            "afari/537.36' -H 'Content-Type: application /x-www-form-urlencode"
+            "d' -H 'Accept: text/html,application/xhtml+xml,application/xml;q="
+            "0.9,image/webp,image/apng,*/*;q=0.8' -H 'Cache-Control: max-age=0"
+            "' -H 'Referer: http://httpbin.org/forms/post' -H 'Connection: kee"
+            "p-alive' --data 'custname=John+Smith&custtel=500&custemail=jsmith"
+            "%40example.org&size=small&topping=cheese&topping=onion&delivery=1"
+            "2%3A15&comments=' --compressed"
+        )
+        r = self.request_class.from_curl(curl_command)
+        self.assertEqual(r.method, "POST")
+        self.assertEqual(r.url, "http://httpbin.org/post")
+        self.assertEqual(r.body,
+                         b"custname=John+Smith&custtel=500&custemail=jsmith%40"
+                         b"example.org&size=small&topping=cheese&topping=onion"
+                         b"&delivery=12%3A15&comments=")
+        self.assertEqual(r.cookies, {
+            '_gauges_unique_year': '1',
+            '_gauges_unique': '1',
+            '_gauges_unique_month': '1',
+            '_gauges_unique_hour': '1',
+            '_gauges_unique_day': '1'
+        })
+        self.assertEqual(r.headers, {
+            b'Origin': [b'http://httpbin.org'],
+            b'Accept-Encoding': [b'gzip, deflate'],
+            b'Accept-Language': [b'en-US,en;q=0.9,ru;q=0.8,es;q=0.7'],
+            b'Upgrade-Insecure-Requests': [b'1'],
+            b'User-Agent': [b'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.'
+                            b'36 (KHTML, like Gecko) Ubuntu Chromium/62.0.3202'
+                            b'.75 Chrome/62.0.3202.75 Safari/537.36'],
+            b'Content-Type': [b'application /x-www-form-urlencoded'],
+            b'Accept': [b'text/html,application/xhtml+xml,application/xml;q=0.'
+                        b'9,image/webp,image/apng,*/*;q=0.8'],
+            b'Cache-Control': [b'max-age=0'],
+            b'Referer': [b'http://httpbin.org/forms/post'],
+            b'Connection': [b'keep-alive']})
+
+    def test_from_curl_with_kwargs(self):
+        r = self.request_class.from_curl(
+            'curl -X PATCH "http://example.org"',
+            method="POST",
+            meta={'key': 'value'}
+        )
+        self.assertEqual(r.method, "POST")
+        self.assertEqual(r.meta, {"key": "value"})
+
+    def test_from_curl_ignore_unknown_options(self):
+        # By default: it works and ignores the unknown options: --foo and -z
+        with warnings.catch_warnings():  # avoid warning when executing tests
+            warnings.simplefilter('ignore')
+            r = self.request_class.from_curl(
+                'curl -X DELETE "http://example.org" --foo -z',
+            )
+            self.assertEqual(r.method, "DELETE")
+
+        # If `ignore_unknon_options` is set to `False` it raises an error with
+        # the unknown options: --foo and -z
+        self.assertRaises(
+            ValueError,
+            lambda: self.request_class.from_curl(
+                'curl -X PATCH "http://example.org" --foo -z',
+                ignore_unknown_options=False,
+            ),
+        )
+
 
 class FormRequestTest(RequestTest):
 
     request_class = FormRequest
 
-    def assertSortedEqual(self, first, second, msg=None):
+    def assertQueryEqual(self, first, second, msg=None):
+        first = to_unicode(first).split("&")
+        second = to_unicode(second).split("&")
         return self.assertEqual(sorted(first), sorted(second), msg)
 
     def test_empty_formdata(self):
         r1 = self.request_class("http://www.example.com", formdata={})
-        self.assertEqual(r1.body, '')
+        self.assertEqual(r1.body, b'')
 
-    def test_default_encoding(self):
+    def test_default_encoding_bytes(self):
         # using default encoding (utf-8)
-        data = {'one': 'two', 'price': '\xc2\xa3 100'}
+        data = {b'one': b'two', b'price': b'\xc2\xa3 100'}
         r2 = self.request_class("http://www.example.com", formdata=data)
         self.assertEqual(r2.method, 'POST')
         self.assertEqual(r2.encoding, 'utf-8')
-        self.assertSortedEqual(r2.body.split('&'),
-                               'price=%C2%A3+100&one=two'.split('&'))
-        self.assertEqual(r2.headers['Content-Type'], 'application/x-www-form-urlencoded')
+        self.assertQueryEqual(r2.body, b'price=%C2%A3+100&one=two')
+        self.assertEqual(r2.headers[b'Content-Type'], b'application/x-www-form-urlencoded')
 
-    def test_custom_encoding(self):
-        data = {'price': u'\xa3 100'}
+    def test_default_encoding_textual_data(self):
+        # using default encoding (utf-8)
+        data = {'µ one': 'two', 'price': '£ 100'}
+        r2 = self.request_class("http://www.example.com", formdata=data)
+        self.assertEqual(r2.method, 'POST')
+        self.assertEqual(r2.encoding, 'utf-8')
+        self.assertQueryEqual(r2.body, b'price=%C2%A3+100&%C2%B5+one=two')
+        self.assertEqual(r2.headers[b'Content-Type'], b'application/x-www-form-urlencoded')
+
+    def test_default_encoding_mixed_data(self):
+        # using default encoding (utf-8)
+        data = {'\u00b5one': b'two', b'price\xc2\xa3': '\u00a3 100'}
+        r2 = self.request_class("http://www.example.com", formdata=data)
+        self.assertEqual(r2.method, 'POST')
+        self.assertEqual(r2.encoding, 'utf-8')
+        self.assertQueryEqual(r2.body, b'%C2%B5one=two&price%C2%A3=%C2%A3+100')
+        self.assertEqual(r2.headers[b'Content-Type'], b'application/x-www-form-urlencoded')
+
+    def test_custom_encoding_bytes(self):
+        data = {b'\xb5 one': b'two', b'price': b'\xa3 100'}
+        r2 = self.request_class("http://www.example.com", formdata=data, encoding='latin1')
+        self.assertEqual(r2.method, 'POST')
+        self.assertEqual(r2.encoding, 'latin1')
+        self.assertQueryEqual(r2.body, b'price=%A3+100&%B5+one=two')
+        self.assertEqual(r2.headers[b'Content-Type'], b'application/x-www-form-urlencoded')
+
+    def test_custom_encoding_textual_data(self):
+        data = {'price': '£ 100'}
         r3 = self.request_class("http://www.example.com", formdata=data, encoding='latin1')
         self.assertEqual(r3.encoding, 'latin1')
-        self.assertEqual(r3.body, 'price=%A3+100')
+        self.assertEqual(r3.body, b'price=%A3+100')
 
     def test_multi_key_values(self):
         # using multiples values for a single key
-        data = {'price': u'\xa3 100', 'colours': ['red', 'blue', 'green']}
+        data = {'price': '\xa3 100', 'colours': ['red', 'blue', 'green']}
         r3 = self.request_class("http://www.example.com", formdata=data)
-        self.assertSortedEqual(r3.body.split('&'),
-            'colours=red&colours=blue&colours=green&price=%C2%A3+100'.split('&'))
+        self.assertQueryEqual(r3.body, b'colours=red&colours=blue&colours=green&price=%C2%A3+100')
 
     def test_from_response_post(self):
         response = _buildresponse(
-            """<form action="post.php" method="POST">
+            b"""<form action="post.php" method="POST">
             <input type="hidden" name="test" value="val1">
             <input type="hidden" name="test" value="val2">
             <input type="hidden" name="test2" value="xxx">
             </form>""",
             url="http://www.example.com/this/list.html")
-        req = self.request_class.from_response(response,
-                formdata={'one': ['two', 'three'], 'six': 'seven'})
+        req = self.request_class.from_response(response, formdata={'one': ['two', 'three'], 'six': 'seven'})
+
         self.assertEqual(req.method, 'POST')
-        self.assertEqual(req.headers['Content-type'], 'application/x-www-form-urlencoded')
+        self.assertEqual(req.headers[b'Content-type'], b'application/x-www-form-urlencoded')
         self.assertEqual(req.url, "http://www.example.com/this/post.php")
         fs = _qs(req)
-        self.assertEqual(set(fs["test"]), set(["val1", "val2"]))
-        self.assertEqual(set(fs["one"]), set(["two", "three"]))
-        self.assertEqual(fs['test2'], ['xxx'])
+        self.assertEqual(set(fs[b'test']), {b'val1', b'val2'})
+        self.assertEqual(set(fs[b'one']), {b'two', b'three'})
+        self.assertEqual(fs[b'test2'], [b'xxx'])
+        self.assertEqual(fs[b'six'], [b'seven'])
+
+    def test_from_response_post_nonascii_bytes_utf8(self):
+        response = _buildresponse(
+            b"""<form action="post.php" method="POST">
+            <input type="hidden" name="test \xc2\xa3" value="val1">
+            <input type="hidden" name="test \xc2\xa3" value="val2">
+            <input type="hidden" name="test2" value="xxx \xc2\xb5">
+            </form>""",
+            url="http://www.example.com/this/list.html")
+        req = self.request_class.from_response(response, formdata={'one': ['two', 'three'], 'six': 'seven'})
+
+        self.assertEqual(req.method, 'POST')
+        self.assertEqual(req.headers[b'Content-type'], b'application/x-www-form-urlencoded')
+        self.assertEqual(req.url, "http://www.example.com/this/post.php")
+        fs = _qs(req, to_unicode=True)
+        self.assertEqual(set(fs['test £']), {'val1', 'val2'})
+        self.assertEqual(set(fs['one']), {'two', 'three'})
+        self.assertEqual(fs['test2'], ['xxx µ'])
         self.assertEqual(fs['six'], ['seven'])
 
+    def test_from_response_post_nonascii_bytes_latin1(self):
+        response = _buildresponse(
+            b"""<form action="post.php" method="POST">
+            <input type="hidden" name="test \xa3" value="val1">
+            <input type="hidden" name="test \xa3" value="val2">
+            <input type="hidden" name="test2" value="xxx \xb5">
+            </form>""",
+            url="http://www.example.com/this/list.html",
+            encoding='latin1',
+        )
+        req = self.request_class.from_response(response, formdata={'one': ['two', 'three'], 'six': 'seven'})
+
+        self.assertEqual(req.method, 'POST')
+        self.assertEqual(req.headers[b'Content-type'], b'application/x-www-form-urlencoded')
+        self.assertEqual(req.url, "http://www.example.com/this/post.php")
+        fs = _qs(req, to_unicode=True, encoding='latin1')
+        self.assertEqual(set(fs['test £']), {'val1', 'val2'})
+        self.assertEqual(set(fs['one']), {'two', 'three'})
+        self.assertEqual(fs['test2'], ['xxx µ'])
+        self.assertEqual(fs['six'], ['seven'])
+
+    def test_from_response_post_nonascii_unicode(self):
+        response = _buildresponse(
+            """<form action="post.php" method="POST">
+            <input type="hidden" name="test £" value="val1">
+            <input type="hidden" name="test £" value="val2">
+            <input type="hidden" name="test2" value="xxx µ">
+            </form>""",
+            url="http://www.example.com/this/list.html")
+        req = self.request_class.from_response(response, formdata={'one': ['two', 'three'], 'six': 'seven'})
+
+        self.assertEqual(req.method, 'POST')
+        self.assertEqual(req.headers[b'Content-type'], b'application/x-www-form-urlencoded')
+        self.assertEqual(req.url, "http://www.example.com/this/post.php")
+        fs = _qs(req, to_unicode=True)
+        self.assertEqual(set(fs['test £']), {'val1', 'val2'})
+        self.assertEqual(set(fs['one']), {'two', 'three'})
+        self.assertEqual(fs['test2'], ['xxx µ'])
+        self.assertEqual(fs['six'], ['seven'])
+
+    def test_from_response_duplicate_form_key(self):
+        response = _buildresponse(
+            '<form></form>',
+            url='http://www.example.com')
+        req = self.request_class.from_response(
+            response=response,
+            method='GET',
+            formdata=(('foo', 'bar'), ('foo', 'baz')),
+        )
+        self.assertEqual(urlparse(req.url).hostname, 'www.example.com')
+        self.assertEqual(urlparse(req.url).query, 'foo=bar&foo=baz')
+
+    def test_from_response_override_duplicate_form_key(self):
+        response = _buildresponse(
+            """<form action="get.php" method="POST">
+            <input type="hidden" name="one" value="1">
+            <input type="hidden" name="two" value="3">
+            </form>""")
+        req = self.request_class.from_response(
+            response,
+            formdata=(('two', '2'), ('two', '4')))
+        fs = _qs(req)
+        self.assertEqual(fs[b'one'], [b'1'])
+        self.assertEqual(fs[b'two'], [b'2', b'4'])
+
     def test_from_response_extra_headers(self):
         response = _buildresponse(
             """<form action="post.php" method="POST">
@@ -240,12 +527,14 @@ class FormRequestTest(RequestTest):
             <input type="hidden" name="test" value="val2">
             <input type="hidden" name="test2" value="xxx">
             </form>""")
-        req = self.request_class.from_response(response,
-                formdata={'one': ['two', 'three'], 'six': 'seven'},
-                headers={"Accept-Encoding": "gzip,deflate"})
+        req = self.request_class.from_response(
+            response=response,
+            formdata={'one': ['two', 'three'], 'six': 'seven'},
+            headers={"Accept-Encoding": "gzip,deflate"},
+        )
         self.assertEqual(req.method, 'POST')
-        self.assertEqual(req.headers['Content-type'], 'application/x-www-form-urlencoded')
-        self.assertEqual(req.headers['Accept-Encoding'], 'gzip,deflate')
+        self.assertEqual(req.headers['Content-type'], b'application/x-www-form-urlencoded')
+        self.assertEqual(req.headers['Accept-Encoding'], b'gzip,deflate')
 
     def test_from_response_get(self):
         response = _buildresponse(
@@ -255,16 +544,15 @@ class FormRequestTest(RequestTest):
             <input type="hidden" name="test2" value="xxx">
             </form>""",
             url="http://www.example.com/this/list.html")
-        r1 = self.request_class.from_response(response,
-                formdata={'one': ['two', 'three'], 'six': 'seven'})
+        r1 = self.request_class.from_response(response, formdata={'one': ['two', 'three'], 'six': 'seven'})
         self.assertEqual(r1.method, 'GET')
         self.assertEqual(urlparse(r1.url).hostname, "www.example.com")
         self.assertEqual(urlparse(r1.url).path, "/this/get.php")
         fs = _qs(r1)
-        self.assertEqual(set(fs['test']), set(['val1', 'val2']))
-        self.assertEqual(set(fs['one']), set(['two', 'three']))
-        self.assertEqual(fs['test2'], ['xxx'])
-        self.assertEqual(fs['six'], ['seven'])
+        self.assertEqual(set(fs[b'test']), {b'val1', b'val2'})
+        self.assertEqual(set(fs[b'one']), {b'two', b'three'})
+        self.assertEqual(fs[b'test2'], [b'xxx'])
+        self.assertEqual(fs[b'six'], [b'seven'])
 
     def test_from_response_override_params(self):
         response = _buildresponse(
@@ -274,14 +562,25 @@ class FormRequestTest(RequestTest):
             </form>""")
         req = self.request_class.from_response(response, formdata={'two': '2'})
         fs = _qs(req)
-        self.assertEqual(fs['one'], ['1'])
-        self.assertEqual(fs['two'], ['2'])
+        self.assertEqual(fs[b'one'], [b'1'])
+        self.assertEqual(fs[b'two'], [b'2'])
+
+    def test_from_response_drop_params(self):
+        response = _buildresponse(
+            """<form action="get.php" method="POST">
+            <input type="hidden" name="one" value="1">
+            <input type="hidden" name="two" value="3">
+            </form>""")
+        req = self.request_class.from_response(response, formdata={'two': None})
+        fs = _qs(req)
+        self.assertEqual(fs[b'one'], [b'1'])
+        self.assertNotIn(b'two', fs)
 
     def test_from_response_override_method(self):
         response = _buildresponse(
-                '''<html><body>
-                <form action="/app"></form>
-                </body></html>''')
+            '''<html><body>
+            <form action="/app"></form>
+            </body></html>''')
         request = FormRequest.from_response(response)
         self.assertEqual(request.method, 'GET')
         request = FormRequest.from_response(response, method='POST')
@@ -289,9 +588,9 @@ class FormRequestTest(RequestTest):
 
     def test_from_response_override_url(self):
         response = _buildresponse(
-                '''<html><body>
-                <form action="/app"></form>
-                </body></html>''')
+            '''<html><body>
+            <form action="/app"></form>
+            </body></html>''')
         request = FormRequest.from_response(response)
         self.assertEqual(request.url, 'http://example.com/app')
         request = FormRequest.from_response(response, url='http://foo.bar/absolute')
@@ -299,6 +598,19 @@ class FormRequestTest(RequestTest):
         request = FormRequest.from_response(response, url='/relative')
         self.assertEqual(request.url, 'http://example.com/relative')
 
+    def test_from_response_case_insensitive(self):
+        response = _buildresponse(
+            """<form action="get.php" method="GET">
+            <input type="SuBmIt" name="clickable1" value="clicked1">
+            <input type="iMaGe" name="i1" src="http://my.image.org/1.jpg">
+            <input type="submit" name="clickable2" value="clicked2">
+            </form>""")
+        req = self.request_class.from_response(response)
+        fs = _qs(req)
+        self.assertEqual(fs[b'clickable1'], [b'clicked1'])
+        self.assertFalse(b'i1' in fs, fs)  # xpath in _get_inputs()
+        self.assertFalse(b'clickable2' in fs, fs)  # xpath in _get_clickable()
+
     def test_from_response_submit_first_clickable(self):
         response = _buildresponse(
             """<form action="get.php" method="GET">
@@ -309,10 +621,10 @@ class FormRequestTest(RequestTest):
             </form>""")
         req = self.request_class.from_response(response, formdata={'two': '2'})
         fs = _qs(req)
-        self.assertEqual(fs['clickable1'], ['clicked1'])
-        self.assertFalse('clickable2' in fs, fs)
-        self.assertEqual(fs['one'], ['1'])
-        self.assertEqual(fs['two'], ['2'])
+        self.assertEqual(fs[b'clickable1'], [b'clicked1'])
+        self.assertFalse(b'clickable2' in fs, fs)
+        self.assertEqual(fs[b'one'], [b'1'])
+        self.assertEqual(fs[b'two'], [b'2'])
 
     def test_from_response_submit_not_first_clickable(self):
         response = _buildresponse(
@@ -322,13 +634,14 @@ class FormRequestTest(RequestTest):
             <input type="hidden" name="two" value="3">
             <input type="submit" name="clickable2" value="clicked2">
             </form>""")
-        req = self.request_class.from_response(response, formdata={'two': '2'}, \
-                                              clickdata={'name': 'clickable2'})
+        req = self.request_class.from_response(
+            response, formdata={'two': '2'}, clickdata={'name': 'clickable2'}
+        )
         fs = _qs(req)
-        self.assertEqual(fs['clickable2'], ['clicked2'])
-        self.assertFalse('clickable1' in fs, fs)
-        self.assertEqual(fs['one'], ['1'])
-        self.assertEqual(fs['two'], ['2'])
+        self.assertEqual(fs[b'clickable2'], [b'clicked2'])
+        self.assertFalse(b'clickable1' in fs, fs)
+        self.assertEqual(fs[b'one'], [b'1'])
+        self.assertEqual(fs[b'two'], [b'2'])
 
     def test_from_response_dont_submit_image_as_input(self):
         response = _buildresponse(
@@ -339,7 +652,7 @@ class FormRequestTest(RequestTest):
             </form>""")
         req = self.request_class.from_response(response, dont_click=True)
         fs = _qs(req)
-        self.assertEqual(fs, {'i1': ['i1v']})
+        self.assertEqual(fs, {b'i1': [b'i1v']})
 
     def test_from_response_dont_submit_reset_as_input(self):
         response = _buildresponse(
@@ -351,7 +664,17 @@ class FormRequestTest(RequestTest):
             </form>""")
         req = self.request_class.from_response(response, dont_click=True)
         fs = _qs(req)
-        self.assertEqual(fs, {'i1': ['i1v'], 'i2': ['i2v']})
+        self.assertEqual(fs, {b'i1': [b'i1v'], b'i2': [b'i2v']})
+
+    def test_from_response_clickdata_does_not_ignore_image(self):
+        response = _buildresponse(
+            """<form>
+            <input type="text" name="i1" value="i1v">
+            <input id="image" name="i2" type="image" value="i2v" alt="Login" src="http://my.image.org/1.jpg">
+            </form>""")
+        req = self.request_class.from_response(response)
+        fs = _qs(req)
+        self.assertEqual(fs, {b'i1': [b'i1v'], b'i2': [b'i2v']})
 
     def test_from_response_multiple_clickdata(self):
         response = _buildresponse(
@@ -361,25 +684,42 @@ class FormRequestTest(RequestTest):
             <input type="hidden" name="one" value="clicked1">
             <input type="hidden" name="two" value="clicked2">
             </form>""")
-        req = self.request_class.from_response(response, \
-                clickdata={'name': 'clickable', 'value': 'clicked2'})
+        req = self.request_class.from_response(
+            response, clickdata={'name': 'clickable', 'value': 'clicked2'}
+        )
         fs = _qs(req)
-        self.assertEqual(fs['clickable'], ['clicked2'])
-        self.assertEqual(fs['one'], ['clicked1'])
-        self.assertEqual(fs['two'], ['clicked2'])
+        self.assertEqual(fs[b'clickable'], [b'clicked2'])
+        self.assertEqual(fs[b'one'], [b'clicked1'])
+        self.assertEqual(fs[b'two'], [b'clicked2'])
 
     def test_from_response_unicode_clickdata(self):
         response = _buildresponse(
-            u"""<form action="get.php" method="GET">
+            """<form action="get.php" method="GET">
             <input type="submit" name="price in \u00a3" value="\u00a3 1000">
             <input type="submit" name="price in \u20ac" value="\u20ac 2000">
             <input type="hidden" name="poundsign" value="\u00a3">
             <input type="hidden" name="eurosign" value="\u20ac">
             </form>""")
-        req = self.request_class.from_response(response, \
-                clickdata={'name': u'price in \u00a3'})
-        fs = _qs(req)
-        self.assertTrue(fs[u'price in \u00a3'.encode('utf-8')])
+        req = self.request_class.from_response(
+            response, clickdata={'name': 'price in \u00a3'}
+        )
+        fs = _qs(req, to_unicode=True)
+        self.assertTrue(fs['price in \u00a3'])
+
+    def test_from_response_unicode_clickdata_latin1(self):
+        response = _buildresponse(
+            """<form action="get.php" method="GET">
+            <input type="submit" name="price in \u00a3" value="\u00a3 1000">
+            <input type="submit" name="price in \u00a5" value="\u00a5 2000">
+            <input type="hidden" name="poundsign" value="\u00a3">
+            <input type="hidden" name="yensign" value="\u00a5">
+            </form>""",
+            encoding='latin1')
+        req = self.request_class.from_response(
+            response, clickdata={'name': 'price in \u00a5'}
+        )
+        fs = _qs(req, to_unicode=True, encoding='latin1')
+        self.assertTrue(fs['price in \u00a5'])
 
     def test_from_response_multiple_forms_clickdata(self):
         response = _buildresponse(
@@ -392,19 +732,21 @@ class FormRequestTest(RequestTest):
             <input type="hidden" name="field2" value="value2">
             </form>
             """)
-        req = self.request_class.from_response(response, formname='form2', \
-                clickdata={'name': 'clickable'})
+        req = self.request_class.from_response(
+            response, formname='form2', clickdata={'name': 'clickable'}
+        )
         fs = _qs(req)
-        self.assertEqual(fs['clickable'], ['clicked2'])
-        self.assertEqual(fs['field2'], ['value2'])
-        self.assertFalse('field1' in fs, fs)
+        self.assertEqual(fs[b'clickable'], [b'clicked2'])
+        self.assertEqual(fs[b'field2'], [b'value2'])
+        self.assertFalse(b'field1' in fs, fs)
 
     def test_from_response_override_clickable(self):
         response = _buildresponse('''<form><input type="submit" name="clickme" value="one"> </form>''')
-        req = self.request_class.from_response(response, \
-                formdata={'clickme': 'two'}, clickdata={'name': 'clickme'})
+        req = self.request_class.from_response(
+            response, formdata={'clickme': 'two'}, clickdata={'name': 'clickme'}
+        )
         fs = _qs(req)
-        self.assertEqual(fs['clickme'], ['two'])
+        self.assertEqual(fs[b'clickme'], [b'two'])
 
     def test_from_response_dont_click(self):
         response = _buildresponse(
@@ -416,8 +758,8 @@ class FormRequestTest(RequestTest):
             </form>""")
         r1 = self.request_class.from_response(response, dont_click=True)
         fs = _qs(r1)
-        self.assertFalse('clickable1' in fs, fs)
-        self.assertFalse('clickable2' in fs, fs)
+        self.assertFalse(b'clickable1' in fs, fs)
+        self.assertFalse(b'clickable2' in fs, fs)
 
     def test_from_response_ambiguous_clickdata(self):
         response = _buildresponse(
@@ -448,8 +790,8 @@ class FormRequestTest(RequestTest):
             """)
         req = self.request_class.from_response(response, clickdata={'nr': 1})
         fs = _qs(req)
-        self.assertIn('clickable2', fs)
-        self.assertNotIn('clickable1', fs)
+        self.assertIn(b'clickable2', fs)
+        self.assertNotIn(b'clickable1', fs)
 
     def test_from_response_invalid_nr_index_clickdata(self):
         response = _buildresponse(
@@ -470,7 +812,7 @@ class FormRequestTest(RequestTest):
                                   """</form></body></html>""")
         req = self.request_class.from_response(response, formdata={'bar': 'buz'})
         fs = _qs(req)
-        self.assertEqual(fs, {'foo': ['xxx'], 'bar': ['buz']})
+        self.assertEqual(fs, {b'foo': [b'xxx'], b'bar': [b'buz']})
 
     def test_from_response_errors_formnumber(self):
         response = _buildresponse(
@@ -487,11 +829,11 @@ class FormRequestTest(RequestTest):
             <input type="hidden" name="one" value="1">
             <input type="hidden" name="two" value="2">
             </form>""")
-        r1 = self.request_class.from_response(response, formdata={'two':'3'})
+        r1 = self.request_class.from_response(response, formdata={'two': '3'})
         self.assertEqual(r1.method, 'POST')
-        self.assertEqual(r1.headers['Content-type'], 'application/x-www-form-urlencoded')
+        self.assertEqual(r1.headers['Content-type'], b'application/x-www-form-urlencoded')
         fs = _qs(r1)
-        self.assertEqual(fs, {'one': ['1'], 'two': ['3']})
+        self.assertEqual(fs, {b'one': [b'1'], b'two': [b'3']})
 
     def test_from_response_formname_exists(self):
         response = _buildresponse(
@@ -506,7 +848,7 @@ class FormRequestTest(RequestTest):
         r1 = self.request_class.from_response(response, formname="form2")
         self.assertEqual(r1.method, 'POST')
         fs = _qs(r1)
-        self.assertEqual(fs, {'four': ['4'], 'three': ['3']})
+        self.assertEqual(fs, {b'four': [b'4'], b'three': [b'3']})
 
     def test_from_response_formname_notexist(self):
         response = _buildresponse(
@@ -519,7 +861,7 @@ class FormRequestTest(RequestTest):
         r1 = self.request_class.from_response(response, formname="form3")
         self.assertEqual(r1.method, 'POST')
         fs = _qs(r1)
-        self.assertEqual(fs, {'one': ['1']})
+        self.assertEqual(fs, {b'one': [b'1']})
 
     def test_from_response_formname_errors_formnumber(self):
         response = _buildresponse(
@@ -529,9 +871,63 @@ class FormRequestTest(RequestTest):
             <form name="form2" action="post.php" method="POST">
             <input type="hidden" name="two" value="2">
             </form>""")
-        self.assertRaises(IndexError, self.request_class.from_response, \
+        self.assertRaises(IndexError, self.request_class.from_response,
                           response, formname="form3", formnumber=2)
 
+    def test_from_response_formid_exists(self):
+        response = _buildresponse(
+            """<form action="post.php" method="POST">
+            <input type="hidden" name="one" value="1">
+            <input type="hidden" name="two" value="2">
+            </form>
+            <form id="form2" action="post.php" method="POST">
+            <input type="hidden" name="three" value="3">
+            <input type="hidden" name="four" value="4">
+            </form>""")
+        r1 = self.request_class.from_response(response, formid="form2")
+        self.assertEqual(r1.method, 'POST')
+        fs = _qs(r1)
+        self.assertEqual(fs, {b'four': [b'4'], b'three': [b'3']})
+
+    def test_from_response_formname_notexists_fallback_formid(self):
+        response = _buildresponse(
+            """<form action="post.php" method="POST">
+            <input type="hidden" name="one" value="1">
+            <input type="hidden" name="two" value="2">
+            </form>
+            <form id="form2" name="form2" action="post.php" method="POST">
+            <input type="hidden" name="three" value="3">
+            <input type="hidden" name="four" value="4">
+            </form>""")
+        r1 = self.request_class.from_response(response, formname="form3", formid="form2")
+        self.assertEqual(r1.method, 'POST')
+        fs = _qs(r1)
+        self.assertEqual(fs, {b'four': [b'4'], b'three': [b'3']})
+
+    def test_from_response_formid_notexist(self):
+        response = _buildresponse(
+            """<form id="form1" action="post.php" method="POST">
+            <input type="hidden" name="one" value="1">
+            </form>
+            <form id="form2" action="post.php" method="POST">
+            <input type="hidden" name="two" value="2">
+            </form>""")
+        r1 = self.request_class.from_response(response, formid="form3")
+        self.assertEqual(r1.method, 'POST')
+        fs = _qs(r1)
+        self.assertEqual(fs, {b'one': [b'1']})
+
+    def test_from_response_formid_errors_formnumber(self):
+        response = _buildresponse(
+            """<form id="form1" action="post.php" method="POST">
+            <input type="hidden" name="one" value="1">
+            </form>
+            <form id="form2" name="form2" action="post.php" method="POST">
+            <input type="hidden" name="two" value="2">
+            </form>""")
+        self.assertRaises(IndexError, self.request_class.from_response,
+                          response, formid="form3", formnumber=2)
+
     def test_from_response_select(self):
         res = _buildresponse(
             '''<form>
@@ -560,7 +956,7 @@ class FormRequestTest(RequestTest):
             <select name="i7"/>
             </form>''')
         req = self.request_class.from_response(res)
-        fs = _qs(req)
+        fs = _qs(req, to_unicode=True)
         self.assertEqual(fs, {'i1': ['i1v2'], 'i2': ['i2v1'], 'i4': ['i4v2', 'i4v3']})
 
     def test_from_response_radio(self):
@@ -577,7 +973,7 @@ class FormRequestTest(RequestTest):
             </form>''')
         req = self.request_class.from_response(res)
         fs = _qs(req)
-        self.assertEqual(fs, {'i1': ['iv2'], 'i2': ['on']})
+        self.assertEqual(fs, {b'i1': [b'iv2'], b'i2': [b'on']})
 
     def test_from_response_checkbox(self):
         res = _buildresponse(
@@ -593,7 +989,7 @@ class FormRequestTest(RequestTest):
             </form>''')
         req = self.request_class.from_response(res)
         fs = _qs(req)
-        self.assertEqual(fs, {'i1': ['iv2'], 'i2': ['on']})
+        self.assertEqual(fs, {b'i1': [b'iv2'], b'i2': [b'on']})
 
     def test_from_response_input_text(self):
         res = _buildresponse(
@@ -602,10 +998,11 @@ class FormRequestTest(RequestTest):
             <input type="text" name="i2">
             <input type="text" value="i3v1">
             <input type="text">
+            <input name="i4" value="i4v1">
             </form>''')
         req = self.request_class.from_response(res)
         fs = _qs(req)
-        self.assertEqual(fs, {'i1': ['i1v1'], 'i2': ['']})
+        self.assertEqual(fs, {b'i1': [b'i1v1'], b'i2': [b''], b'i4': [b'i4v1']})
 
     def test_from_response_input_hidden(self):
         res = _buildresponse(
@@ -617,7 +1014,7 @@ class FormRequestTest(RequestTest):
             </form>''')
         req = self.request_class.from_response(res)
         fs = _qs(req)
-        self.assertEqual(fs, {'i1': ['i1v1'], 'i2': ['']})
+        self.assertEqual(fs, {b'i1': [b'i1v1'], b'i2': [b'']})
 
     def test_from_response_input_textarea(self):
         res = _buildresponse(
@@ -629,7 +1026,7 @@ class FormRequestTest(RequestTest):
             </form>''')
         req = self.request_class.from_response(res)
         fs = _qs(req)
-        self.assertEqual(fs, {'i1': ['i1v'], 'i2': [''], 'i3': ['']})
+        self.assertEqual(fs, {b'i1': [b'i1v'], b'i2': [b''], b'i3': [b'']})
 
     def test_from_response_descendants(self):
         res = _buildresponse(
@@ -650,7 +1047,7 @@ class FormRequestTest(RequestTest):
             </form>''')
         req = self.request_class.from_response(res)
         fs = _qs(req)
-        self.assertEqual(set(fs), set(['h2', 'i2', 'i1', 'i3', 'h1', 'i5', 'i4']))
+        self.assertEqual(set(fs), {b'h2', b'i2', b'i1', b'i3', b'h1', b'i5', b'i4'})
 
     def test_from_response_xpath(self):
         response = _buildresponse(
@@ -664,39 +1061,183 @@ class FormRequestTest(RequestTest):
             </form>""")
         r1 = self.request_class.from_response(response, formxpath="//form[@action='post.php']")
         fs = _qs(r1)
-        self.assertEqual(fs['one'], ['1'])
+        self.assertEqual(fs[b'one'], [b'1'])
 
         r1 = self.request_class.from_response(response, formxpath="//form/input[@name='four']")
         fs = _qs(r1)
-        self.assertEqual(fs['three'], ['3'])
+        self.assertEqual(fs[b'three'], [b'3'])
 
         self.assertRaises(ValueError, self.request_class.from_response,
                           response, formxpath="//form/input[@name='abc']")
 
+    def test_from_response_unicode_xpath(self):
+        response = _buildresponse(b'<form name="\xd1\x8a"></form>')
+        r = self.request_class.from_response(response, formxpath="//form[@name='\u044a']")
+        fs = _qs(r)
+        self.assertEqual(fs, {})
+
+        xpath = "//form[@name='\u03b1']"
+        self.assertRaisesRegex(ValueError, re.escape(xpath),
+                               self.request_class.from_response,
+                               response, formxpath=xpath)
+
+    def test_from_response_button_submit(self):
+        response = _buildresponse(
+            """<form action="post.php" method="POST">
+            <input type="hidden" name="test1" value="val1">
+            <input type="hidden" name="test2" value="val2">
+            <button type="submit" name="button1" value="submit1">Submit</button>
+            </form>""",
+            url="http://www.example.com/this/list.html")
+        req = self.request_class.from_response(response)
+        self.assertEqual(req.method, 'POST')
+        self.assertEqual(req.headers['Content-type'], b'application/x-www-form-urlencoded')
+        self.assertEqual(req.url, "http://www.example.com/this/post.php")
+        fs = _qs(req)
+        self.assertEqual(fs[b'test1'], [b'val1'])
+        self.assertEqual(fs[b'test2'], [b'val2'])
+        self.assertEqual(fs[b'button1'], [b'submit1'])
+
+    def test_from_response_button_notype(self):
+        response = _buildresponse(
+            """<form action="post.php" method="POST">
+            <input type="hidden" name="test1" value="val1">
+            <input type="hidden" name="test2" value="val2">
+            <button name="button1" value="submit1">Submit</button>
+            </form>""",
+            url="http://www.example.com/this/list.html")
+        req = self.request_class.from_response(response)
+        self.assertEqual(req.method, 'POST')
+        self.assertEqual(req.headers['Content-type'], b'application/x-www-form-urlencoded')
+        self.assertEqual(req.url, "http://www.example.com/this/post.php")
+        fs = _qs(req)
+        self.assertEqual(fs[b'test1'], [b'val1'])
+        self.assertEqual(fs[b'test2'], [b'val2'])
+        self.assertEqual(fs[b'button1'], [b'submit1'])
+
+    def test_from_response_submit_novalue(self):
+        response = _buildresponse(
+            """<form action="post.php" method="POST">
+            <input type="hidden" name="test1" value="val1">
+            <input type="hidden" name="test2" value="val2">
+            <input type="submit" name="button1">Submit</button>
+            </form>""",
+            url="http://www.example.com/this/list.html")
+        req = self.request_class.from_response(response)
+        self.assertEqual(req.method, 'POST')
+        self.assertEqual(req.headers['Content-type'], b'application/x-www-form-urlencoded')
+        self.assertEqual(req.url, "http://www.example.com/this/post.php")
+        fs = _qs(req)
+        self.assertEqual(fs[b'test1'], [b'val1'])
+        self.assertEqual(fs[b'test2'], [b'val2'])
+        self.assertEqual(fs[b'button1'], [b''])
+
+    def test_from_response_button_novalue(self):
+        response = _buildresponse(
+            """<form action="post.php" method="POST">
+            <input type="hidden" name="test1" value="val1">
+            <input type="hidden" name="test2" value="val2">
+            <button type="submit" name="button1">Submit</button>
+            </form>""",
+            url="http://www.example.com/this/list.html")
+        req = self.request_class.from_response(response)
+        self.assertEqual(req.method, 'POST')
+        self.assertEqual(req.headers['Content-type'], b'application/x-www-form-urlencoded')
+        self.assertEqual(req.url, "http://www.example.com/this/post.php")
+        fs = _qs(req)
+        self.assertEqual(fs[b'test1'], [b'val1'])
+        self.assertEqual(fs[b'test2'], [b'val2'])
+        self.assertEqual(fs[b'button1'], [b''])
+
+    def test_html_base_form_action(self):
+        response = _buildresponse(
+            """
+            <html>
+                <head>
+                    <base href=" http://b.com/">
+                </head>
+                <body>
+                    <form action="test_form">
+                    </form>
+                </body>
+            </html>
+            """,
+            url='http://a.com/'
+        )
+        req = self.request_class.from_response(response)
+        self.assertEqual(req.url, 'http://b.com/test_form')
+
+    def test_spaces_in_action(self):
+        resp = _buildresponse('<body><form action=" path\n"></form></body>')
+        req = self.request_class.from_response(resp)
+        self.assertEqual(req.url, 'http://example.com/path')
+
+    def test_from_response_css(self):
+        response = _buildresponse(
+            """<form action="post.php" method="POST">
+            <input type="hidden" name="one" value="1">
+            <input type="hidden" name="two" value="2">
+            </form>
+            <form action="post2.php" method="POST">
+            <input type="hidden" name="three" value="3">
+            <input type="hidden" name="four" value="4">
+            </form>""")
+        r1 = self.request_class.from_response(response, formcss="form[action='post.php']")
+        fs = _qs(r1)
+        self.assertEqual(fs[b'one'], [b'1'])
+
+        r1 = self.request_class.from_response(response, formcss="input[name='four']")
+        fs = _qs(r1)
+        self.assertEqual(fs[b'three'], [b'3'])
+
+        self.assertRaises(ValueError, self.request_class.from_response,
+                          response, formcss="input[name='abc']")
+
+    def test_from_response_valid_form_methods(self):
+        body = """<form action="post.php" method="%s">
+            <input type="hidden" name="one" value="1">
+            </form>"""
+
+        for method in self.request_class.valid_form_methods:
+            response = _buildresponse(body % method)
+            r = self.request_class.from_response(response)
+            self.assertEqual(r.method, method)
+
+        response = _buildresponse(body % 'UNKNOWN')
+        r = self.request_class.from_response(response)
+        self.assertEqual(r.method, 'GET')
+
+
 def _buildresponse(body, **kwargs):
     kwargs.setdefault('body', body)
     kwargs.setdefault('url', 'http://example.com')
     kwargs.setdefault('encoding', 'utf-8')
     return HtmlResponse(**kwargs)
 
-def _qs(req):
+
+def _qs(req, encoding='utf-8', to_unicode=False):
     if req.method == 'POST':
         qs = req.body
     else:
         qs = req.url.partition('?')[2]
-    return cgi.parse_qs(qs, True)
+    uqs = unquote_to_bytes(qs)
+    if to_unicode:
+        uqs = uqs.decode(encoding)
+    return parse_qs(uqs, True)
 
 
 class XmlRpcRequestTest(RequestTest):
 
     request_class = XmlRpcRequest
     default_method = 'POST'
-    default_headers = {'Content-Type': ['text/xml']}
+    default_headers = {b'Content-Type': [b'text/xml']}
 
     def _test_request(self, **kwargs):
         r = self.request_class('http://scrapytest.org/rpc2', **kwargs)
-        self.assertEqual(r.headers['Content-Type'], 'text/xml')
-        self.assertEqual(r.body, xmlrpclib.dumps(**kwargs))
+        self.assertEqual(r.headers[b'Content-Type'], b'text/xml')
+        self.assertEqual(r.body,
+                         to_bytes(xmlrpc.client.dumps(**kwargs),
+                                  encoding=kwargs.get('encoding', 'utf-8')))
         self.assertEqual(r.method, 'POST')
         self.assertEqual(r.encoding, kwargs.get('encoding', 'utf-8'))
         self.assertTrue(r.dont_filter, True)
@@ -705,12 +1246,181 @@ class XmlRpcRequestTest(RequestTest):
         self._test_request(params=('value',))
         self._test_request(params=('username', 'password'), methodname='login')
         self._test_request(params=('response', ), methodresponse='login')
-        self._test_request(params=(u'pas\xa3',), encoding='utf-8')
-        self._test_request(params=(u'pas\xa3',), encoding='latin')
+        self._test_request(params=('pas£',), encoding='utf-8')
         self._test_request(params=(None,), allow_none=1)
         self.assertRaises(TypeError, self._test_request)
         self.assertRaises(TypeError, self._test_request, params=(None,))
 
+    def test_latin1(self):
+        self._test_request(params=('pas£',), encoding='latin1')
+
+
+class JsonRequestTest(RequestTest):
+    request_class = JsonRequest
+    default_method = 'GET'
+    default_headers = {
+        b'Content-Type': [b'application/json'],
+        b'Accept': [b'application/json, text/javascript, */*; q=0.01'],
+    }
+
+    def setUp(self):
+        warnings.simplefilter("always")
+        super().setUp()
+
+    def test_data(self):
+        r1 = self.request_class(url="http://www.example.com/")
+        self.assertEqual(r1.body, b'')
+
+        body = b'body'
+        r2 = self.request_class(url="http://www.example.com/", body=body)
+        self.assertEqual(r2.body, body)
+
+        data = {
+            'name': 'value',
+        }
+        r3 = self.request_class(url="http://www.example.com/", data=data)
+        self.assertEqual(r3.body, to_bytes(json.dumps(data)))
+
+        # empty data
+        r4 = self.request_class(url="http://www.example.com/", data=[])
+        self.assertEqual(r4.body, to_bytes(json.dumps([])))
+
+    def test_data_method(self):
+        # data is not passed
+        r1 = self.request_class(url="http://www.example.com/")
+        self.assertEqual(r1.method, 'GET')
+
+        body = b'body'
+        r2 = self.request_class(url="http://www.example.com/", body=body)
+        self.assertEqual(r2.method, 'GET')
+
+        data = {
+            'name': 'value',
+        }
+        r3 = self.request_class(url="http://www.example.com/", data=data)
+        self.assertEqual(r3.method, 'POST')
+
+        # method passed explicitly
+        r4 = self.request_class(url="http://www.example.com/", data=data, method='GET')
+        self.assertEqual(r4.method, 'GET')
+
+        r5 = self.request_class(url="http://www.example.com/", data=[])
+        self.assertEqual(r5.method, 'POST')
+
+    def test_body_data(self):
+        """ passing both body and data should result a warning """
+        body = b'body'
+        data = {
+            'name': 'value',
+        }
+        with warnings.catch_warnings(record=True) as _warnings:
+            r5 = self.request_class(url="http://www.example.com/", body=body, data=data)
+            self.assertEqual(r5.body, body)
+            self.assertEqual(r5.method, 'GET')
+            self.assertEqual(len(_warnings), 1)
+            self.assertIn('data will be ignored', str(_warnings[0].message))
+
+    def test_empty_body_data(self):
+        """ passing any body value and data should result a warning """
+        data = {
+            'name': 'value',
+        }
+        with warnings.catch_warnings(record=True) as _warnings:
+            r6 = self.request_class(url="http://www.example.com/", body=b'', data=data)
+            self.assertEqual(r6.body, b'')
+            self.assertEqual(r6.method, 'GET')
+            self.assertEqual(len(_warnings), 1)
+            self.assertIn('data will be ignored', str(_warnings[0].message))
+
+    def test_body_none_data(self):
+        data = {
+            'name': 'value',
+        }
+        with warnings.catch_warnings(record=True) as _warnings:
+            r7 = self.request_class(url="http://www.example.com/", body=None, data=data)
+            self.assertEqual(r7.body, to_bytes(json.dumps(data)))
+            self.assertEqual(r7.method, 'POST')
+            self.assertEqual(len(_warnings), 0)
+
+    def test_body_data_none(self):
+        with warnings.catch_warnings(record=True) as _warnings:
+            r8 = self.request_class(url="http://www.example.com/", body=None, data=None)
+            self.assertEqual(r8.method, 'GET')
+            self.assertEqual(len(_warnings), 0)
+
+    def test_dumps_sort_keys(self):
+        """ Test that sort_keys=True is passed to json.dumps by default """
+        data = {
+            'name': 'value',
+        }
+        with mock.patch('json.dumps', return_value=b'') as mock_dumps:
+            self.request_class(url="http://www.example.com/", data=data)
+            kwargs = mock_dumps.call_args[1]
+            self.assertEqual(kwargs['sort_keys'], True)
+
+    def test_dumps_kwargs(self):
+        """ Test that dumps_kwargs are passed to json.dumps """
+        data = {
+            'name': 'value',
+        }
+        dumps_kwargs = {
+            'ensure_ascii': True,
+            'allow_nan': True,
+        }
+        with mock.patch('json.dumps', return_value=b'') as mock_dumps:
+            self.request_class(url="http://www.example.com/", data=data, dumps_kwargs=dumps_kwargs)
+            kwargs = mock_dumps.call_args[1]
+            self.assertEqual(kwargs['ensure_ascii'], True)
+            self.assertEqual(kwargs['allow_nan'], True)
+
+    def test_replace_data(self):
+        data1 = {
+            'name1': 'value1',
+        }
+        data2 = {
+            'name2': 'value2',
+        }
+        r1 = self.request_class(url="http://www.example.com/", data=data1)
+        r2 = r1.replace(data=data2)
+        self.assertEqual(r2.body, to_bytes(json.dumps(data2)))
+
+    def test_replace_sort_keys(self):
+        """ Test that replace provides sort_keys=True to json.dumps """
+        data1 = {
+            'name1': 'value1',
+        }
+        data2 = {
+            'name2': 'value2',
+        }
+        r1 = self.request_class(url="http://www.example.com/", data=data1)
+        with mock.patch('json.dumps', return_value=b'') as mock_dumps:
+            r1.replace(data=data2)
+            kwargs = mock_dumps.call_args[1]
+            self.assertEqual(kwargs['sort_keys'], True)
+
+    def test_replace_dumps_kwargs(self):
+        """ Test that dumps_kwargs are provided to json.dumps when replace is called """
+        data1 = {
+            'name1': 'value1',
+        }
+        data2 = {
+            'name2': 'value2',
+        }
+        dumps_kwargs = {
+            'ensure_ascii': True,
+            'allow_nan': True,
+        }
+        r1 = self.request_class(url="http://www.example.com/", data=data1, dumps_kwargs=dumps_kwargs)
+        with mock.patch('json.dumps', return_value=b'') as mock_dumps:
+            r1.replace(data=data2)
+            kwargs = mock_dumps.call_args[1]
+            self.assertEqual(kwargs['ensure_ascii'], True)
+            self.assertEqual(kwargs['allow_nan'], True)
+
+    def tearDown(self):
+        warnings.resetwarnings()
+        super().tearDown()
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_http_response.py b/tests/test_http_response.py
index 26a628182..f831ef5dc 100644
--- a/tests/test_http_response.py
+++ b/tests/test_http_response.py
@@ -1,8 +1,17 @@
 import unittest
+from unittest import mock
+from warnings import catch_warnings
 
 from w3lib.encoding import resolve_encoding
-from scrapy.http import Request, Response, TextResponse, HtmlResponse, XmlResponse, Headers
+
+from scrapy.exceptions import ScrapyDeprecationWarning
+from scrapy.http import (Request, Response, TextResponse, HtmlResponse,
+                         XmlResponse, Headers)
 from scrapy.selector import Selector
+from scrapy.utils.python import to_unicode
+from scrapy.exceptions import NotSupported
+from scrapy.link import Link
+from tests import get_testdata
 
 
 class BaseResponseTest(unittest.TestCase):
@@ -13,11 +22,16 @@ class BaseResponseTest(unittest.TestCase):
         # Response requires url in the consturctor
         self.assertRaises(Exception, self.response_class)
         self.assertTrue(isinstance(self.response_class('http://example.com/'), self.response_class))
+        self.assertRaises(TypeError, self.response_class, b"http://example.com")
         # body can be str or None
-        self.assertTrue(isinstance(self.response_class('http://example.com/', body=''), self.response_class))
-        self.assertTrue(isinstance(self.response_class('http://example.com/', body='body'), self.response_class))
+        self.assertTrue(isinstance(self.response_class('http://example.com/', body=b''), self.response_class))
+        self.assertTrue(isinstance(self.response_class('http://example.com/', body=b'body'), self.response_class))
         # test presence of all optional parameters
-        self.assertTrue(isinstance(self.response_class('http://example.com/', headers={}, status=200, body=''), self.response_class))
+        self.assertTrue(
+            isinstance(
+                self.response_class('http://example.com/', body=b'', headers={}, status=200), self.response_class
+            )
+        )
 
         r = self.response_class("http://www.example.com")
         assert isinstance(r.url, str)
@@ -27,12 +41,12 @@ class BaseResponseTest(unittest.TestCase):
         assert isinstance(r.headers, Headers)
         self.assertEqual(r.headers, {})
 
-        headers = {"caca": "coco"}
-        body = "a body"
+        headers = {"foo": "bar"}
+        body = b"a body"
         r = self.response_class("http://www.example.com", headers=headers, body=body)
 
         assert r.headers is not headers
-        self.assertEqual(r.headers["caca"], "coco")
+        self.assertEqual(r.headers[b"foo"], b"bar")
 
         r = self.response_class("http://www.example.com", status=301)
         self.assertEqual(r.status, 301)
@@ -43,7 +57,7 @@ class BaseResponseTest(unittest.TestCase):
     def test_copy(self):
         """Test Response copy"""
 
-        r1 = self.response_class("http://www.example.com", body="Some body")
+        r1 = self.response_class("http://www.example.com", body=b"Some body")
         r1.flags.append('cached')
         r2 = r1.copy()
 
@@ -61,9 +75,25 @@ class BaseResponseTest(unittest.TestCase):
     def test_copy_meta(self):
         req = Request("http://www.example.com")
         req.meta['foo'] = 'bar'
-        r1 = self.response_class("http://www.example.com", body="Some body", request=req)
+        r1 = self.response_class("http://www.example.com", body=b"Some body", request=req)
         assert r1.meta is req.meta
 
+    def test_copy_cb_kwargs(self):
+        req = Request("http://www.example.com")
+        req.cb_kwargs['foo'] = 'bar'
+        r1 = self.response_class("http://www.example.com", body=b"Some body", request=req)
+        assert r1.cb_kwargs is req.cb_kwargs
+
+    def test_unavailable_meta(self):
+        r1 = self.response_class("http://www.example.com", body=b"Some body")
+        with self.assertRaisesRegex(AttributeError, r'Response\.meta not available'):
+            r1.meta
+
+    def test_unavailable_cb_kwargs(self):
+        r1 = self.response_class("http://www.example.com", body=b"Some body")
+        with self.assertRaisesRegex(AttributeError, r'Response\.cb_kwargs not available'):
+            r1.cb_kwargs
+
     def test_copy_inherited_classes(self):
         """Test Response children copies preserve their class"""
 
@@ -79,31 +109,33 @@ class BaseResponseTest(unittest.TestCase):
         """Test Response.replace() method"""
         hdrs = Headers({"key": "value"})
         r1 = self.response_class("http://www.example.com")
-        r2 = r1.replace(status=301, body="New body", headers=hdrs)
-        assert r1.body == ''
+        r2 = r1.replace(status=301, body=b"New body", headers=hdrs)
+        assert r1.body == b''
         self.assertEqual(r1.url, r2.url)
         self.assertEqual((r1.status, r2.status), (200, 301))
-        self.assertEqual((r1.body, r2.body), ('', "New body"))
+        self.assertEqual((r1.body, r2.body), (b'', b"New body"))
         self.assertEqual((r1.headers, r2.headers), ({}, hdrs))
 
         # Empty attributes (which may fail if not compared properly)
         r3 = self.response_class("http://www.example.com", flags=['cached'])
-        r4 = r3.replace(body='', flags=[])
-        self.assertEqual(r4.body, '')
+        r4 = r3.replace(body=b'', flags=[])
+        self.assertEqual(r4.body, b'')
         self.assertEqual(r4.flags, [])
 
     def _assert_response_values(self, response, encoding, body):
-        if isinstance(body, unicode):
+        if isinstance(body, str):
             body_unicode = body
-            body_str = body.encode(encoding)
+            body_bytes = body.encode(encoding)
         else:
             body_unicode = body.decode(encoding)
-            body_str = body
+            body_bytes = body
 
-        assert isinstance(response.body, str)
+        assert isinstance(response.body, bytes)
+        assert isinstance(response.text, str)
         self._assert_response_encoding(response, encoding)
-        self.assertEqual(response.body, body_str)
+        self.assertEqual(response.body, body_bytes)
         self.assertEqual(response.body_as_unicode(), body_unicode)
+        self.assertEqual(response.text, body_unicode)
 
     def _assert_response_encoding(self, response, encoding):
         self.assertEqual(response.encoding, resolve_encoding(encoding))
@@ -113,11 +145,159 @@ class BaseResponseTest(unittest.TestCase):
         self.assertRaises(AttributeError, setattr, r, 'url', 'http://example2.com')
         self.assertRaises(AttributeError, setattr, r, 'body', 'xxx')
 
+    def test_urljoin(self):
+        """Test urljoin shortcut (only for existence, since behavior equals urljoin)"""
+        joined = self.response_class('http://www.example.com').urljoin('/test')
+        absolute = 'http://www.example.com/test'
+        self.assertEqual(joined, absolute)
 
-class ResponseText(BaseResponseTest):
+    def test_shortcut_attributes(self):
+        r = self.response_class("http://example.com", body=b'hello')
+        if self.response_class == Response:
+            msg = "Response content isn't text"
+            self.assertRaisesRegex(AttributeError, msg, getattr, r, 'text')
+            self.assertRaisesRegex(NotSupported, msg, r.css, 'body')
+            self.assertRaisesRegex(NotSupported, msg, r.xpath, '//body')
+        else:
+            r.text
+            r.css('body')
+            r.xpath('//body')
 
-    def test_no_unicode_url(self):
-        self.assertRaises(TypeError, self.response_class, u'http://www.example.com')
+    # Response.follow
+
+    def test_follow_url_absolute(self):
+        self._assert_followed_url('http://foo.example.com',
+                                  'http://foo.example.com')
+
+    def test_follow_url_relative(self):
+        self._assert_followed_url('foo',
+                                  'http://example.com/foo')
+
+    def test_follow_link(self):
+        self._assert_followed_url(Link('http://example.com/foo'),
+                                  'http://example.com/foo')
+
+    def test_follow_None_url(self):
+        r = self.response_class("http://example.com")
+        self.assertRaises(ValueError, r.follow, None)
+
+    def test_follow_whitespace_url(self):
+        self._assert_followed_url('foo ',
+                                  'http://example.com/foo%20')
+
+    def test_follow_whitespace_link(self):
+        self._assert_followed_url(Link('http://example.com/foo '),
+                                  'http://example.com/foo%20')
+
+    def test_follow_flags(self):
+        res = self.response_class('http://example.com/')
+        fol = res.follow('http://example.com/', flags=['cached', 'allowed'])
+        self.assertEqual(fol.flags, ['cached', 'allowed'])
+
+    # Response.follow_all
+
+    def test_follow_all_absolute(self):
+        url_list = ['http://example.org', 'http://www.example.org',
+                    'http://example.com', 'http://www.example.com']
+        self._assert_followed_all_urls(url_list, url_list)
+
+    def test_follow_all_relative(self):
+        relative = ['foo', 'bar', 'foo/bar', 'bar/foo']
+        absolute = [
+            'http://example.com/foo',
+            'http://example.com/bar',
+            'http://example.com/foo/bar',
+            'http://example.com/bar/foo',
+        ]
+        self._assert_followed_all_urls(relative, absolute)
+
+    def test_follow_all_links(self):
+        absolute = [
+            'http://example.com/foo',
+            'http://example.com/bar',
+            'http://example.com/foo/bar',
+            'http://example.com/bar/foo',
+        ]
+        links = map(Link, absolute)
+        self._assert_followed_all_urls(links, absolute)
+
+    def test_follow_all_empty(self):
+        r = self.response_class("http://example.com")
+        self.assertEqual([], list(r.follow_all([])))
+
+    def test_follow_all_invalid(self):
+        r = self.response_class("http://example.com")
+        if self.response_class == Response:
+            with self.assertRaises(TypeError):
+                list(r.follow_all(urls=None))
+            with self.assertRaises(TypeError):
+                list(r.follow_all(urls=12345))
+            with self.assertRaises(ValueError):
+                list(r.follow_all(urls=[None]))
+        else:
+            with self.assertRaises(ValueError):
+                list(r.follow_all(urls=None))
+            with self.assertRaises(TypeError):
+                list(r.follow_all(urls=12345))
+            with self.assertRaises(ValueError):
+                list(r.follow_all(urls=[None]))
+
+    def test_follow_all_whitespace(self):
+        relative = ['foo ', 'bar ', 'foo/bar ', 'bar/foo ']
+        absolute = [
+            'http://example.com/foo%20',
+            'http://example.com/bar%20',
+            'http://example.com/foo/bar%20',
+            'http://example.com/bar/foo%20',
+        ]
+        self._assert_followed_all_urls(relative, absolute)
+
+    def test_follow_all_whitespace_links(self):
+        absolute = [
+            'http://example.com/foo ',
+            'http://example.com/bar ',
+            'http://example.com/foo/bar ',
+            'http://example.com/bar/foo ',
+        ]
+        links = map(Link, absolute)
+        expected = [u.replace(' ', '%20') for u in absolute]
+        self._assert_followed_all_urls(links, expected)
+
+    def test_follow_all_flags(self):
+        re = self.response_class('http://www.example.com/')
+        urls = [
+            'http://www.example.com/',
+            'http://www.example.com/2',
+            'http://www.example.com/foo',
+        ]
+        fol = re.follow_all(urls, flags=['cached', 'allowed'])
+        for req in fol:
+            self.assertEqual(req.flags, ['cached', 'allowed'])
+
+    def _assert_followed_url(self, follow_obj, target_url, response=None):
+        if response is None:
+            response = self._links_response()
+        req = response.follow(follow_obj)
+        self.assertEqual(req.url, target_url)
+        return req
+
+    def _assert_followed_all_urls(self, follow_obj, target_urls, response=None):
+        if response is None:
+            response = self._links_response()
+        followed = response.follow_all(follow_obj)
+        for req, target in zip(followed, target_urls):
+            self.assertEqual(req.url, target)
+            yield req
+
+    def _links_response(self):
+        body = get_testdata('link_extractor', 'linkextractor.html')
+        resp = self.response_class('http://example.com/index', body=body)
+        return resp
+
+    def _links_response_no_href(self):
+        body = get_testdata('link_extractor', 'linkextractor_no_href.html')
+        resp = self.response_class('http://example.com/index', body=body)
+        return resp
 
 
 class TextResponseTest(BaseResponseTest):
@@ -125,7 +305,7 @@ class TextResponseTest(BaseResponseTest):
     response_class = TextResponse
 
     def test_replace(self):
-        super(TextResponseTest, self).test_replace()
+        super().test_replace()
         r1 = self.response_class("http://www.example.com", body="hello", encoding="cp852")
         r2 = r1.replace(url="http://www.example.com/other")
         r3 = r1.replace(url="http://www.example.com/other", encoding="latin1")
@@ -138,41 +318,53 @@ class TextResponseTest(BaseResponseTest):
 
     def test_unicode_url(self):
         # instantiate with unicode url without encoding (should set default encoding)
-        resp = self.response_class(u"http://www.example.com/")
+        resp = self.response_class("http://www.example.com/")
         self._assert_response_encoding(resp, self.response_class._DEFAULT_ENCODING)
 
         # make sure urls are converted to str
-        resp = self.response_class(url=u"http://www.example.com/", encoding='utf-8')
+        resp = self.response_class(url="http://www.example.com/", encoding='utf-8')
         assert isinstance(resp.url, str)
 
-        resp = self.response_class(url=u"http://www.example.com/price/\xa3", encoding='utf-8')
-        self.assertEqual(resp.url, 'http://www.example.com/price/\xc2\xa3')
-        resp = self.response_class(url=u"http://www.example.com/price/\xa3", encoding='latin-1')
+        resp = self.response_class(url="http://www.example.com/price/\xa3", encoding='utf-8')
+        self.assertEqual(resp.url, to_unicode(b'http://www.example.com/price/\xc2\xa3'))
+        resp = self.response_class(url="http://www.example.com/price/\xa3", encoding='latin-1')
         self.assertEqual(resp.url, 'http://www.example.com/price/\xa3')
-        resp = self.response_class(u"http://www.example.com/price/\xa3", headers={"Content-type": ["text/html; charset=utf-8"]})
-        self.assertEqual(resp.url, 'http://www.example.com/price/\xc2\xa3')
-        resp = self.response_class(u"http://www.example.com/price/\xa3", headers={"Content-type": ["text/html; charset=iso-8859-1"]})
+        resp = self.response_class("http://www.example.com/price/\xa3",
+                                   headers={"Content-type": ["text/html; charset=utf-8"]})
+        self.assertEqual(resp.url, to_unicode(b'http://www.example.com/price/\xc2\xa3'))
+        resp = self.response_class("http://www.example.com/price/\xa3",
+                                   headers={"Content-type": ["text/html; charset=iso-8859-1"]})
         self.assertEqual(resp.url, 'http://www.example.com/price/\xa3')
 
     def test_unicode_body(self):
-        unicode_string = u'\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0447\u0435\u0441\u043a\u0438\u0439 \u0442\u0435\u043a\u0441\u0442'
-        self.assertRaises(TypeError, self.response_class, 'http://www.example.com', body=u'unicode body')
+        unicode_string = ('\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0447\u0435\u0441\u043a\u0438\u0439 '
+                          '\u0442\u0435\u043a\u0441\u0442')
+        self.assertRaises(TypeError, self.response_class, 'http://www.example.com', body='unicode body')
 
         original_string = unicode_string.encode('cp1251')
         r1 = self.response_class('http://www.example.com', body=original_string, encoding='cp1251')
 
         # check body_as_unicode
-        self.assertTrue(isinstance(r1.body_as_unicode(), unicode))
+        self.assertTrue(isinstance(r1.body_as_unicode(), str))
         self.assertEqual(r1.body_as_unicode(), unicode_string)
 
+        # check response.text
+        self.assertTrue(isinstance(r1.text, str))
+        self.assertEqual(r1.text, unicode_string)
+
     def test_encoding(self):
-        r1 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=utf-8"]}, body="\xc2\xa3")
-        r2 = self.response_class("http://www.example.com", encoding='utf-8', body=u"\xa3")
-        r3 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=iso-8859-1"]}, body="\xa3")
-        r4 = self.response_class("http://www.example.com", body="\xa2\xa3")
-        r5 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=None"]}, body="\xc2\xa3")
-        r6 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=gb2312"]}, body="\xa8D")
-        r7 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=gbk"]}, body="\xa8D")
+        r1 = self.response_class("http://www.example.com", body=b"\xc2\xa3",
+                                 headers={"Content-type": ["text/html; charset=utf-8"]})
+        r2 = self.response_class("http://www.example.com", encoding='utf-8', body="\xa3")
+        r3 = self.response_class("http://www.example.com", body=b"\xa3",
+                                 headers={"Content-type": ["text/html; charset=iso-8859-1"]})
+        r4 = self.response_class("http://www.example.com", body=b"\xa2\xa3")
+        r5 = self.response_class("http://www.example.com", body=b"\xc2\xa3",
+                                 headers={"Content-type": ["text/html; charset=None"]})
+        r6 = self.response_class("http://www.example.com", body=b"\xa8D",
+                                 headers={"Content-type": ["text/html; charset=gb2312"]})
+        r7 = self.response_class("http://www.example.com", body=b"\xa8D",
+                                 headers={"Content-type": ["text/html; charset=gbk"]})
 
         self.assertEqual(r1._headers_encoding(), "utf-8")
         self.assertEqual(r2._headers_encoding(), None)
@@ -184,52 +376,55 @@ class TextResponseTest(BaseResponseTest):
         self.assertEqual(r5._headers_encoding(), None)
         self._assert_response_encoding(r5, "utf-8")
         assert r4._body_inferred_encoding() is not None and r4._body_inferred_encoding() != 'ascii'
-        self._assert_response_values(r1, 'utf-8', u"\xa3")
-        self._assert_response_values(r2, 'utf-8', u"\xa3")
-        self._assert_response_values(r3, 'iso-8859-1', u"\xa3")
-        self._assert_response_values(r6, 'gb18030', u"\u2015")
-        self._assert_response_values(r7, 'gb18030', u"\u2015")
+        self._assert_response_values(r1, 'utf-8', "\xa3")
+        self._assert_response_values(r2, 'utf-8', "\xa3")
+        self._assert_response_values(r3, 'iso-8859-1', "\xa3")
+        self._assert_response_values(r6, 'gb18030', "\u2015")
+        self._assert_response_values(r7, 'gb18030', "\u2015")
 
         # TextResponse (and subclasses) must be passed a encoding when instantiating with unicode bodies
-        self.assertRaises(TypeError, self.response_class, "http://www.example.com", body=u"\xa3")
+        self.assertRaises(TypeError, self.response_class, "http://www.example.com", body="\xa3")
 
     def test_declared_encoding_invalid(self):
         """Check that unknown declared encodings are ignored"""
         r = self.response_class("http://www.example.com",
                                 headers={"Content-type": ["text/html; charset=UKNOWN"]},
-                                body="\xc2\xa3")
+                                body=b"\xc2\xa3")
         self.assertEqual(r._declared_encoding(), None)
-        self._assert_response_values(r, 'utf-8', u"\xa3")
+        self._assert_response_values(r, 'utf-8', "\xa3")
 
     def test_utf16(self):
         """Test utf-16 because UnicodeDammit is known to have problems with"""
         r = self.response_class("http://www.example.com",
-                                body='\xff\xfeh\x00i\x00',
+                                body=b'\xff\xfeh\x00i\x00',
                                 encoding='utf-16')
-        self._assert_response_values(r, 'utf-16', u"hi")
+        self._assert_response_values(r, 'utf-16', "hi")
 
     def test_invalid_utf8_encoded_body_with_valid_utf8_BOM(self):
         r6 = self.response_class("http://www.example.com",
                                  headers={"Content-type": ["text/html; charset=utf-8"]},
-                                 body="\xef\xbb\xbfWORD\xe3\xab")
+                                 body=b"\xef\xbb\xbfWORD\xe3\xab")
         self.assertEqual(r6.encoding, 'utf-8')
-        self.assertEqual(r6.body_as_unicode(), u'WORD\ufffd\ufffd')
+        self.assertIn(r6.text, {
+            'WORD\ufffd\ufffd',  # w3lib < 1.19.0
+            'WORD\ufffd',        # w3lib >= 1.19.0
+        })
 
     def test_bom_is_removed_from_body(self):
         # Inferring encoding from body also cache decoded body as sideeffect,
         # this test tries to ensure that calling response.encoding and
-        # response.body_as_unicode() in indistint order doesn't affect final
+        # response.text in indistint order doesn't affect final
         # values for encoding and decoded body.
         url = 'http://example.com'
-        body = "\xef\xbb\xbfWORD"
+        body = b"\xef\xbb\xbfWORD"
         headers = {"Content-type": ["text/html; charset=utf-8"]}
 
         # Test response without content-type and BOM encoding
         response = self.response_class(url, body=body)
         self.assertEqual(response.encoding, 'utf-8')
-        self.assertEqual(response.body_as_unicode(), u'WORD')
+        self.assertEqual(response.text, 'WORD')
         response = self.response_class(url, body=body)
-        self.assertEqual(response.body_as_unicode(), u'WORD')
+        self.assertEqual(response.text, 'WORD')
         self.assertEqual(response.encoding, 'utf-8')
 
         # Body caching sideeffect isn't triggered when encoding is declared in
@@ -237,31 +432,31 @@ class TextResponseTest(BaseResponseTest):
         # body
         response = self.response_class(url, headers=headers, body=body)
         self.assertEqual(response.encoding, 'utf-8')
-        self.assertEqual(response.body_as_unicode(), u'WORD')
+        self.assertEqual(response.text, 'WORD')
         response = self.response_class(url, headers=headers, body=body)
-        self.assertEqual(response.body_as_unicode(), u'WORD')
+        self.assertEqual(response.text, 'WORD')
         self.assertEqual(response.encoding, 'utf-8')
 
     def test_replace_wrong_encoding(self):
         """Test invalid chars are replaced properly"""
-        r = self.response_class("http://www.example.com", encoding='utf-8', body='PREFIX\xe3\xabSUFFIX')
+        r = self.response_class("http://www.example.com", encoding='utf-8', body=b'PREFIX\xe3\xabSUFFIX')
         # XXX: Policy for replacing invalid chars may suffer minor variations
-        # but it should always contain the unicode replacement char (u'\ufffd')
-        assert u'\ufffd' in r.body_as_unicode(), repr(r.body_as_unicode())
-        assert u'PREFIX' in r.body_as_unicode(), repr(r.body_as_unicode())
-        assert u'SUFFIX' in r.body_as_unicode(), repr(r.body_as_unicode())
+        # but it should always contain the unicode replacement char ('\ufffd')
+        assert '\ufffd' in r.text, repr(r.text)
+        assert 'PREFIX' in r.text, repr(r.text)
+        assert 'SUFFIX' in r.text, repr(r.text)
 
         # Do not destroy html tags due to encoding bugs
-        r = self.response_class("http://example.com", encoding='utf-8', \
-                body='\xf0<span>value</span>')
-        assert u'<span>value</span>' in r.body_as_unicode(), repr(r.body_as_unicode())
+        r = self.response_class("http://example.com", encoding='utf-8',
+                                body=b'\xf0<span>value</span>')
+        assert '<span>value</span>' in r.text, repr(r.text)
 
         # FIXME: This test should pass once we stop using BeautifulSoup's UnicodeDammit in TextResponse
-        #r = self.response_class("http://www.example.com", body='PREFIX\xe3\xabSUFFIX')
-        #assert u'\ufffd' in r.body_as_unicode(), repr(r.body_as_unicode())
+        # r = self.response_class("http://www.example.com", body=b'PREFIX\xe3\xabSUFFIX')
+        # assert '\ufffd' in r.text, repr(r.text)
 
     def test_selector(self):
-        body = "<html><head><title>Some page</title><body></body></html>"
+        body = b"<html><head><title>Some page</title><body></body></html>"
         response = self.response_class("http://www.example.com", body=body)
 
         self.assertIsInstance(response.selector, Selector)
@@ -270,31 +465,247 @@ class TextResponseTest(BaseResponseTest):
         self.assertIs(response.selector.response, response)
 
         self.assertEqual(
-            response.selector.xpath("//title/text()").extract(),
-            [u'Some page']
+            response.selector.xpath("//title/text()").getall(),
+            ['Some page']
         )
         self.assertEqual(
-            response.selector.css("title::text").extract(),
-            [u'Some page']
+            response.selector.css("title::text").getall(),
+            ['Some page']
         )
         self.assertEqual(
             response.selector.re("Some (.*)</title>"),
-            [u'page']
+            ['page']
         )
 
     def test_selector_shortcuts(self):
-        body = "<html><head><title>Some page</title><body></body></html>"
+        body = b"<html><head><title>Some page</title><body></body></html>"
         response = self.response_class("http://www.example.com", body=body)
 
         self.assertEqual(
-            response.xpath("//title/text()").extract(),
-            response.selector.xpath("//title/text()").extract(),
+            response.xpath("//title/text()").getall(),
+            response.selector.xpath("//title/text()").getall(),
         )
         self.assertEqual(
-            response.css("title::text").extract(),
-            response.selector.css("title::text").extract(),
+            response.css("title::text").getall(),
+            response.selector.css("title::text").getall(),
         )
 
+    def test_selector_shortcuts_kwargs(self):
+        body = b"<html><head><title>Some page</title><body><p class=\"content\">A nice paragraph.</p></body></html>"
+        response = self.response_class("http://www.example.com", body=body)
+
+        self.assertEqual(
+            response.xpath("normalize-space(//p[@class=$pclass])", pclass="content").getall(),
+            response.xpath("normalize-space(//p[@class=\"content\"])").getall(),
+        )
+        self.assertEqual(
+            response.xpath(
+                "//title[count(following::p[@class=$pclass])=$pcount]/text()",
+                pclass="content", pcount=1,
+            ).getall(),
+            response.xpath("//title[count(following::p[@class=\"content\"])=1]/text()").getall(),
+        )
+
+    def test_urljoin_with_base_url(self):
+        """Test urljoin shortcut which also evaluates base-url through get_base_url()."""
+        body = b'<html><body><base href="https://example.net"></body></html>'
+        joined = self.response_class('http://www.example.com', body=body).urljoin('/test')
+        absolute = 'https://example.net/test'
+        self.assertEqual(joined, absolute)
+
+        body = b'<html><body><base href="/elsewhere"></body></html>'
+        joined = self.response_class('http://www.example.com', body=body).urljoin('test')
+        absolute = 'http://www.example.com/test'
+        self.assertEqual(joined, absolute)
+
+        body = b'<html><body><base href="/elsewhere/"></body></html>'
+        joined = self.response_class('http://www.example.com', body=body).urljoin('test')
+        absolute = 'http://www.example.com/elsewhere/test'
+        self.assertEqual(joined, absolute)
+
+    def test_follow_selector(self):
+        resp = self._links_response()
+        urls = [
+            'http://example.com/sample2.html',
+            'http://example.com/sample3.html',
+            'http://example.com/sample3.html',
+            'http://example.com/sample3.html#foo',
+            'http://www.google.com/something',
+            'http://example.com/innertag.html'
+        ]
+
+        # select <a> elements
+        for sellist in [resp.css('a'), resp.xpath('//a')]:
+            for sel, url in zip(sellist, urls):
+                self._assert_followed_url(sel, url, response=resp)
+
+        # select <link> elements
+        self._assert_followed_url(
+            Selector(text='<link href="foo"></link>').css('link')[0],
+            'http://example.com/foo',
+            response=resp
+        )
+
+        # href attributes should work
+        for sellist in [resp.css('a::attr(href)'), resp.xpath('//a/@href')]:
+            for sel, url in zip(sellist, urls):
+                self._assert_followed_url(sel, url, response=resp)
+
+        # non-a elements are not supported
+        self.assertRaises(ValueError, resp.follow, resp.css('div')[0])
+
+    def test_follow_selector_list(self):
+        resp = self._links_response()
+        self.assertRaisesRegex(ValueError, 'SelectorList',
+                               resp.follow, resp.css('a'))
+
+    def test_follow_selector_invalid(self):
+        resp = self._links_response()
+        self.assertRaisesRegex(ValueError, 'Unsupported',
+                               resp.follow, resp.xpath('count(//div)')[0])
+
+    def test_follow_selector_attribute(self):
+        resp = self._links_response()
+        for src in resp.css('img::attr(src)'):
+            self._assert_followed_url(src, 'http://example.com/sample2.jpg')
+
+    def test_follow_selector_no_href(self):
+        resp = self.response_class(
+            url='http://example.com',
+            body=b'<html><body><a name=123>click me</a></body></html>',
+        )
+        self.assertRaisesRegex(ValueError, 'no href',
+                               resp.follow, resp.css('a')[0])
+
+    def test_follow_whitespace_selector(self):
+        resp = self.response_class(
+            'http://example.com',
+            body=b'''<html><body><a href=" foo\n">click me</a></body></html>'''
+        )
+        self._assert_followed_url(
+            resp.css('a')[0],
+            'http://example.com/foo',
+            response=resp)
+        self._assert_followed_url(
+            resp.css('a::attr(href)')[0],
+            'http://example.com/foo',
+            response=resp)
+
+    def test_follow_encoding(self):
+        resp1 = self.response_class(
+            'http://example.com',
+            encoding='utf8',
+            body='<html><body><a href="foo?привет">click me</a></body></html>'.encode('utf8')
+        )
+        req = self._assert_followed_url(
+            resp1.css('a')[0],
+            'http://example.com/foo?%D0%BF%D1%80%D0%B8%D0%B2%D0%B5%D1%82',
+            response=resp1,
+        )
+        self.assertEqual(req.encoding, 'utf8')
+
+        resp2 = self.response_class(
+            'http://example.com',
+            encoding='cp1251',
+            body='<html><body><a href="foo?привет">click me</a></body></html>'.encode('cp1251')
+        )
+        req = self._assert_followed_url(
+            resp2.css('a')[0],
+            'http://example.com/foo?%EF%F0%E8%E2%E5%F2',
+            response=resp2,
+        )
+        self.assertEqual(req.encoding, 'cp1251')
+
+    def test_follow_flags(self):
+        res = self.response_class('http://example.com/')
+        fol = res.follow('http://example.com/', flags=['cached', 'allowed'])
+        self.assertEqual(fol.flags, ['cached', 'allowed'])
+
+    def test_follow_all_flags(self):
+        re = self.response_class('http://www.example.com/')
+        urls = [
+            'http://www.example.com/',
+            'http://www.example.com/2',
+            'http://www.example.com/foo',
+        ]
+        fol = re.follow_all(urls, flags=['cached', 'allowed'])
+        for req in fol:
+            self.assertEqual(req.flags, ['cached', 'allowed'])
+
+    def test_follow_all_css(self):
+        expected = [
+            'http://example.com/sample3.html',
+            'http://example.com/innertag.html',
+        ]
+        response = self._links_response()
+        extracted = [r.url for r in response.follow_all(css='a[href*="example.com"]')]
+        self.assertEqual(expected, extracted)
+
+    def test_follow_all_css_skip_invalid(self):
+        expected = [
+            'http://example.com/page/1/',
+            'http://example.com/page/3/',
+            'http://example.com/page/4/',
+        ]
+        response = self._links_response_no_href()
+        extracted1 = [r.url for r in response.follow_all(css='.pagination a')]
+        self.assertEqual(expected, extracted1)
+        extracted2 = [r.url for r in response.follow_all(response.css('.pagination a'))]
+        self.assertEqual(expected, extracted2)
+
+    def test_follow_all_xpath(self):
+        expected = [
+            'http://example.com/sample3.html',
+            'http://example.com/innertag.html',
+        ]
+        response = self._links_response()
+        extracted = response.follow_all(xpath='//a[contains(@href, "example.com")]')
+        self.assertEqual(expected, [r.url for r in extracted])
+
+    def test_follow_all_xpath_skip_invalid(self):
+        expected = [
+            'http://example.com/page/1/',
+            'http://example.com/page/3/',
+            'http://example.com/page/4/',
+        ]
+        response = self._links_response_no_href()
+        extracted1 = [r.url for r in response.follow_all(xpath='//div[@id="pagination"]/a')]
+        self.assertEqual(expected, extracted1)
+        extracted2 = [r.url for r in response.follow_all(response.xpath('//div[@id="pagination"]/a'))]
+        self.assertEqual(expected, extracted2)
+
+    def test_follow_all_too_many_arguments(self):
+        response = self._links_response()
+        with self.assertRaises(ValueError):
+            response.follow_all(css='a[href*="example.com"]', xpath='//a[contains(@href, "example.com")]')
+
+    def test_body_as_unicode_deprecation_warning(self):
+        with catch_warnings(record=True) as warnings:
+            r1 = self.response_class("http://www.example.com", body='Hello', encoding='utf-8')
+            self.assertEqual(r1.body_as_unicode(), 'Hello')
+            self.assertEqual(len(warnings), 1)
+            self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)
+
+    def test_json_response(self):
+        json_body = b"""{"ip": "109.187.217.200"}"""
+        json_response = self.response_class("http://www.example.com", body=json_body)
+        self.assertEqual(json_response.json(), {'ip': '109.187.217.200'})
+
+        text_body = b"""<html><body>text</body></html>"""
+        text_response = self.response_class("http://www.example.com", body=text_body)
+        with self.assertRaises(ValueError):
+            text_response.json()
+
+    def test_cache_json_response(self):
+        json_valid_bodies = [b"""{"ip": "109.187.217.200"}""", b"""null"""]
+        for json_body in json_valid_bodies:
+            json_response = self.response_class("http://www.example.com", body=json_body)
+
+            with mock.patch('json.loads') as mock_json:
+                for _ in range(2):
+                    json_response.json()
+                mock_json.assert_called_once_with(json_body.decode())
+
 
 class HtmlResponseTest(TextResponseTest):
 
@@ -302,13 +713,13 @@ class HtmlResponseTest(TextResponseTest):
 
     def test_html_encoding(self):
 
-        body = """<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+        body = b"""<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
         </head><body>Price: \xa3100</body></html>'
         """
         r1 = self.response_class("http://www.example.com", body=body)
         self._assert_response_values(r1, 'iso-8859-1', body)
 
-        body = """<?xml version="1.0" encoding="iso-8859-1"?>
+        body = b"""<?xml version="1.0" encoding="iso-8859-1"?>
         <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
         Price: \xa3100
         """
@@ -316,19 +727,20 @@ class HtmlResponseTest(TextResponseTest):
         self._assert_response_values(r2, 'iso-8859-1', body)
 
         # for conflicting declarations headers must take precedence
-        body = """<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+        body = b"""<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=utf-8">
         </head><body>Price: \xa3100</body></html>'
         """
-        r3 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=iso-8859-1"]}, body=body)
+        r3 = self.response_class("http://www.example.com", body=body,
+                                 headers={"Content-type": ["text/html; charset=iso-8859-1"]})
         self._assert_response_values(r3, 'iso-8859-1', body)
 
         # make sure replace() preserves the encoding of the original response
-        body = "New body \xa3"
+        body = b"New body \xa3"
         r4 = r3.replace(body=body)
         self._assert_response_values(r4, 'iso-8859-1', body)
 
     def test_html5_meta_charset(self):
-        body = """<html><head><meta charset="gb2312" /><title>Some page</title><body>bla bla</body>"""
+        body = b"""<html><head><meta charset="gb2312" /><title>Some page</title><body>bla bla</body>"""
         r1 = self.response_class("http://www.example.com", body=body)
         self._assert_response_values(r1, 'gb2312', body)
 
@@ -338,26 +750,25 @@ class XmlResponseTest(TextResponseTest):
     response_class = XmlResponse
 
     def test_xml_encoding(self):
-
-        body = "<xml></xml>"
+        body = b"<xml></xml>"
         r1 = self.response_class("http://www.example.com", body=body)
         self._assert_response_values(r1, self.response_class._DEFAULT_ENCODING, body)
 
-        body = """<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>"""
+        body = b"""<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>"""
         r2 = self.response_class("http://www.example.com", body=body)
         self._assert_response_values(r2, 'iso-8859-1', body)
 
-        # make sure replace() preserves the explicit encoding passed in the constructor
-        body = """<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>"""
+        # make sure replace() preserves the explicit encoding passed in the __init__ method
+        body = b"""<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>"""
         r3 = self.response_class("http://www.example.com", body=body, encoding='utf-8')
-        body2 = "New body"
+        body2 = b"New body"
         r4 = r3.replace(body=body2)
         self._assert_response_values(r4, 'utf-8', body2)
 
     def test_replace_encoding(self):
         # make sure replace() keeps the previous encoding unless overridden explicitly
-        body = """<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>"""
-        body2 = """<?xml version="1.0" encoding="utf-8"?><xml></xml>"""
+        body = b"""<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>"""
+        body2 = b"""<?xml version="1.0" encoding="utf-8"?><xml></xml>"""
         r5 = self.response_class("http://www.example.com", body=body)
         r6 = r5.replace(body=body2)
         r7 = r5.replace(body=body2, encoding='utf-8')
@@ -366,7 +777,7 @@ class XmlResponseTest(TextResponseTest):
         self._assert_response_values(r7, 'utf-8', body2)
 
     def test_selector(self):
-        body = '<?xml version="1.0" encoding="utf-8"?><xml><elem>value</elem></xml>'
+        body = b'<?xml version="1.0" encoding="utf-8"?><xml><elem>value</elem></xml>'
         response = self.response_class("http://www.example.com", body=body)
 
         self.assertIsInstance(response.selector, Selector)
@@ -375,20 +786,33 @@ class XmlResponseTest(TextResponseTest):
         self.assertIs(response.selector.response, response)
 
         self.assertEqual(
-            response.selector.xpath("//elem/text()").extract(),
-            [u'value']
+            response.selector.xpath("//elem/text()").getall(),
+            ['value']
         )
 
     def test_selector_shortcuts(self):
-        body = '<?xml version="1.0" encoding="utf-8"?><xml><elem>value</elem></xml>'
+        body = b'<?xml version="1.0" encoding="utf-8"?><xml><elem>value</elem></xml>'
         response = self.response_class("http://www.example.com", body=body)
 
         self.assertEqual(
-            response.xpath("//elem/text()").extract(),
-            response.selector.xpath("//elem/text()").extract(),
+            response.xpath("//elem/text()").getall(),
+            response.selector.xpath("//elem/text()").getall(),
         )
 
+    def test_selector_shortcuts_kwargs(self):
+        body = b'''<?xml version="1.0" encoding="utf-8"?>
+        <xml xmlns:somens="http://scrapy.org">
+        <somens:elem>value</somens:elem>
+        </xml>'''
+        response = self.response_class("http://www.example.com", body=body)
 
+        self.assertEqual(
+            response.xpath("//s:elem/text()", namespaces={'s': 'http://scrapy.org'}).getall(),
+            response.selector.xpath("//s:elem/text()", namespaces={'s': 'http://scrapy.org'}).getall(),
+        )
 
-if __name__ == "__main__":
-    unittest.main()
+        response.selector.register_namespace('s2', 'http://scrapy.org')
+        self.assertEqual(
+            response.xpath("//s1:elem/text()", namespaces={'s1': 'http://scrapy.org'}).getall(),
+            response.selector.xpath("//s2:elem/text()").getall(),
+        )
diff --git a/tests/test_item.py b/tests/test_item.py
index 5de77d22e..66fa761f0 100644
--- a/tests/test_item.py
+++ b/tests/test_item.py
@@ -1,7 +1,13 @@
+import sys
 import unittest
+from unittest import mock
+from warnings import catch_warnings
 
-from scrapy.item import Item, Field
-import six
+from scrapy.exceptions import ScrapyDeprecationWarning
+from scrapy.item import ABCMeta, _BaseItem, BaseItem, DictItem, Field, Item, ItemMeta
+
+
+PY36_PLUS = (sys.version_info.major >= 3) and (sys.version_info.minor >= 6)
 
 
 class ItemTest(unittest.TestCase):
@@ -14,8 +20,8 @@ class ItemTest(unittest.TestCase):
             name = Field()
 
         i = TestItem()
-        i['name'] = u'name'
-        self.assertEqual(i['name'], u'name')
+        i['name'] = 'name'
+        self.assertEqual(i['name'], 'name')
 
     def test_init(self):
         class TestItem(Item):
@@ -24,17 +30,17 @@ class ItemTest(unittest.TestCase):
         i = TestItem()
         self.assertRaises(KeyError, i.__getitem__, 'name')
 
-        i2 = TestItem(name=u'john doe')
-        self.assertEqual(i2['name'], u'john doe')
+        i2 = TestItem(name='john doe')
+        self.assertEqual(i2['name'], 'john doe')
 
-        i3 = TestItem({'name': u'john doe'})
-        self.assertEqual(i3['name'], u'john doe')
+        i3 = TestItem({'name': 'john doe'})
+        self.assertEqual(i3['name'], 'john doe')
 
         i4 = TestItem(i3)
-        self.assertEqual(i4['name'], u'john doe')
+        self.assertEqual(i4['name'], 'john doe')
 
-        self.assertRaises(KeyError, TestItem, {'name': u'john doe',
-                                               'other': u'foo'})
+        self.assertRaises(KeyError, TestItem, {'name': 'john doe',
+                                               'other': 'foo'})
 
     def test_invalid_field(self):
         class TestItem(Item):
@@ -50,16 +56,12 @@ class ItemTest(unittest.TestCase):
             number = Field()
 
         i = TestItem()
-        i['name'] = u'John Doe'
+        i['name'] = 'John Doe'
         i['number'] = 123
         itemrepr = repr(i)
 
-        if six.PY2:
-            self.assertEqual(itemrepr,
-                             "{'name': u'John Doe', 'number': 123}")
-        else:
-            self.assertEqual(itemrepr,
-                             "{'name': 'John Doe', 'number': 123}")
+        self.assertEqual(itemrepr,
+                         "{'name': 'John Doe', 'number': 123}")
 
         i2 = eval(itemrepr)
         self.assertEqual(i2['name'], 'John Doe')
@@ -99,9 +101,9 @@ class ItemTest(unittest.TestCase):
 
         i = TestItem()
         self.assertRaises(KeyError, i.get_name)
-        i['name'] = u'lala'
-        self.assertEqual(i.get_name(), u'lala')
-        i.change_name(u'other')
+        i['name'] = 'lala'
+        self.assertEqual(i.get_name(), 'lala')
+        i.change_name('other')
         self.assertEqual(i.get_name(), 'other')
 
     def test_metaclass(self):
@@ -111,22 +113,30 @@ class ItemTest(unittest.TestCase):
             values = Field()
 
         i = TestItem()
-        i['name'] = u'John'
+        i['name'] = 'John'
         self.assertEqual(list(i.keys()), ['name'])
         self.assertEqual(list(i.values()), ['John'])
 
-        i['keys'] = u'Keys'
-        i['values'] = u'Values'
+        i['keys'] = 'Keys'
+        i['values'] = 'Values'
         self.assertSortedEqual(list(i.keys()), ['keys', 'values', 'name'])
-        self.assertSortedEqual(list(i.values()), [u'Keys', u'Values', u'John'])
+        self.assertSortedEqual(list(i.values()), ['Keys', 'Values', 'John'])
+
+    def test_metaclass_with_fields_attribute(self):
+        class TestItem(Item):
+            fields = {'new': Field(default='X')}
+
+        item = TestItem(new='New')
+        self.assertSortedEqual(list(item.keys()), ['new'])
+        self.assertSortedEqual(list(item.values()), ['New'])
 
     def test_metaclass_inheritance(self):
-        class BaseItem(Item):
+        class ParentItem(Item):
             name = Field()
             keys = Field()
             values = Field()
 
-        class TestItem(BaseItem):
+        class TestItem(ParentItem):
             keys = Field()
 
         i = TestItem()
@@ -134,23 +144,263 @@ class ItemTest(unittest.TestCase):
         self.assertEqual(list(i.keys()), ['keys'])
         self.assertEqual(list(i.values()), [3])
 
+    def test_metaclass_multiple_inheritance_simple(self):
+        class A(Item):
+            fields = {'load': Field(default='A')}
+            save = Field(default='A')
+
+        class B(A):
+            pass
+
+        class C(Item):
+            fields = {'load': Field(default='C')}
+            save = Field(default='C')
+
+        class D(B, C):
+            pass
+
+        item = D(save='X', load='Y')
+        self.assertEqual(item['save'], 'X')
+        self.assertEqual(item['load'], 'Y')
+        self.assertEqual(D.fields, {'load': {'default': 'A'}, 'save': {'default': 'A'}})
+
+        # D class inverted
+        class E(C, B):
+            pass
+
+        self.assertEqual(E(save='X')['save'], 'X')
+        self.assertEqual(E(load='X')['load'], 'X')
+        self.assertEqual(E.fields, {'load': {'default': 'C'}, 'save': {'default': 'C'}})
+
+    def test_metaclass_multiple_inheritance_diamond(self):
+        class A(Item):
+            fields = {'update': Field(default='A')}
+            save = Field(default='A')
+            load = Field(default='A')
+
+        class B(A):
+            pass
+
+        class C(A):
+            fields = {'update': Field(default='C')}
+            save = Field(default='C')
+
+        class D(B, C):
+            fields = {'update': Field(default='D')}
+            load = Field(default='D')
+
+        self.assertEqual(D(save='X')['save'], 'X')
+        self.assertEqual(D(load='X')['load'], 'X')
+        self.assertEqual(
+            D.fields,
+            {'save': {'default': 'C'}, 'load': {'default': 'D'}, 'update': {'default': 'D'}})
+
+        # D class inverted
+        class E(C, B):
+            load = Field(default='E')
+
+        self.assertEqual(E(save='X')['save'], 'X')
+        self.assertEqual(E(load='X')['load'], 'X')
+        self.assertEqual(
+            E.fields,
+            {'save': {'default': 'C'}, 'load': {'default': 'E'}, 'update': {'default': 'C'}})
+
+    def test_metaclass_multiple_inheritance_without_metaclass(self):
+        class A(Item):
+            fields = {'load': Field(default='A')}
+            save = Field(default='A')
+
+        class B(A):
+            pass
+
+        class C:
+            fields = {'load': Field(default='C')}
+            not_allowed = Field(default='not_allowed')
+            save = Field(default='C')
+
+        class D(B, C):
+            pass
+
+        self.assertRaises(KeyError, D, not_allowed='value')
+        self.assertEqual(D(save='X')['save'], 'X')
+        self.assertEqual(D.fields, {'save': {'default': 'A'}, 'load': {'default': 'A'}})
+
+        # D class inverted
+        class E(C, B):
+            pass
+
+        self.assertRaises(KeyError, E, not_allowed='value')
+        self.assertEqual(E(save='X')['save'], 'X')
+        self.assertEqual(E.fields, {'save': {'default': 'A'}, 'load': {'default': 'A'}})
+
     def test_to_dict(self):
         class TestItem(Item):
             name = Field()
 
         i = TestItem()
-        i['name'] = u'John'
-        self.assertEqual(dict(i), {'name': u'John'})
+        i['name'] = 'John'
+        self.assertEqual(dict(i), {'name': 'John'})
 
     def test_copy(self):
         class TestItem(Item):
             name = Field()
-        item = TestItem({'name':'lower'})
+        item = TestItem({'name': 'lower'})
         copied_item = item.copy()
         self.assertNotEqual(id(item), id(copied_item))
         copied_item['name'] = copied_item['name'].upper()
         self.assertNotEqual(item['name'], copied_item['name'])
 
+    def test_deepcopy(self):
+        class TestItem(Item):
+            tags = Field()
+        item = TestItem({'tags': ['tag1']})
+        copied_item = item.deepcopy()
+        item['tags'].append('tag2')
+        assert item['tags'] != copied_item['tags']
+
+    def test_dictitem_deprecation_warning(self):
+        """Make sure the DictItem deprecation warning is not issued for
+        Item"""
+        with catch_warnings(record=True) as warnings:
+            Item()
+            self.assertEqual(len(warnings), 0)
+
+            class SubclassedItem(Item):
+                pass
+            SubclassedItem()
+            self.assertEqual(len(warnings), 0)
+
+
+class ItemMetaTest(unittest.TestCase):
+
+    def test_new_method_propagates_classcell(self):
+        new_mock = mock.Mock(side_effect=ABCMeta.__new__)
+        base = ItemMeta.__bases__[0]
+
+        with mock.patch.object(base, '__new__', new_mock):
+
+            class MyItem(Item):
+                if not PY36_PLUS:
+                    # This attribute is an internal attribute in Python 3.6+
+                    # and must be propagated properly. See
+                    # https://docs.python.org/3.6/reference/datamodel.html#creating-the-class-object
+                    # In <3.6, we add a dummy attribute just to ensure the
+                    # __new__ method propagates it correctly.
+                    __classcell__ = object()
+
+                def f(self):
+                    # For rationale of this see:
+                    # https://github.com/python/cpython/blob/ee1a81b77444c6715cbe610e951c655b6adab88b/Lib/test/test_super.py#L222
+                    return __class__  # noqa  https://github.com/scrapy/scrapy/issues/2836
+
+            MyItem()
+
+        (first_call, second_call) = new_mock.call_args_list[-2:]
+
+        mcs, class_name, bases, attrs = first_call[0]
+        assert '__classcell__' not in attrs
+        mcs, class_name, bases, attrs = second_call[0]
+        assert '__classcell__' in attrs
+
+
+class ItemMetaClassCellRegression(unittest.TestCase):
+
+    def test_item_meta_classcell_regression(self):
+        class MyItem(Item, metaclass=ItemMeta):
+            def __init__(self, *args, **kwargs):
+                # This call to super() trigger the __classcell__ propagation
+                # requirement. When not done properly raises an error:
+                # TypeError: __class__ set to <class '__main__.MyItem'>
+                # defining 'MyItem' as <class '__main__.MyItem'>
+                super().__init__(*args, **kwargs)
+
+
+class DictItemTest(unittest.TestCase):
+
+    def test_deprecation_warning(self):
+        with catch_warnings(record=True) as warnings:
+            DictItem()
+            self.assertEqual(len(warnings), 1)
+            self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)
+        with catch_warnings(record=True) as warnings:
+            class SubclassedDictItem(DictItem):
+                pass
+            SubclassedDictItem()
+            self.assertEqual(len(warnings), 1)
+            self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)
+
+
+class BaseItemTest(unittest.TestCase):
+
+    def test_isinstance_check(self):
+
+        class SubclassedBaseItem(BaseItem):
+            pass
+
+        class SubclassedItem(Item):
+            pass
+
+        self.assertTrue(isinstance(BaseItem(), BaseItem))
+        self.assertTrue(isinstance(SubclassedBaseItem(), BaseItem))
+        self.assertTrue(isinstance(Item(), BaseItem))
+        self.assertTrue(isinstance(SubclassedItem(), BaseItem))
+
+        # make sure internal checks using private _BaseItem class succeed
+        self.assertTrue(isinstance(BaseItem(), _BaseItem))
+        self.assertTrue(isinstance(SubclassedBaseItem(), _BaseItem))
+        self.assertTrue(isinstance(Item(), _BaseItem))
+        self.assertTrue(isinstance(SubclassedItem(), _BaseItem))
+
+    def test_deprecation_warning(self):
+        """
+        Make sure deprecation warnings are logged whenever BaseItem is used,
+        either instantiated or in an isinstance check
+        """
+        with catch_warnings(record=True) as warnings:
+            BaseItem()
+            self.assertEqual(len(warnings), 1)
+            self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)
+
+        with catch_warnings(record=True) as warnings:
+
+            class SubclassedBaseItem(BaseItem):
+                pass
+
+            SubclassedBaseItem()
+            self.assertEqual(len(warnings), 1)
+            self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)
+
+        with catch_warnings(record=True) as warnings:
+            self.assertFalse(isinstance("foo", BaseItem))
+            self.assertEqual(len(warnings), 1)
+            self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)
+
+        with catch_warnings(record=True) as warnings:
+            self.assertTrue(isinstance(BaseItem(), BaseItem))
+            self.assertEqual(len(warnings), 1)
+            self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)
+
+
+class ItemNoDeprecationWarningTest(unittest.TestCase):
+    def test_no_deprecation_warning(self):
+        """
+        Make sure deprecation warnings are NOT logged whenever BaseItem subclasses are used.
+        """
+        class SubclassedItem(Item):
+            pass
+
+        with catch_warnings(record=True) as warnings:
+            Item()
+            SubclassedItem()
+            _BaseItem()
+            self.assertFalse(isinstance("foo", _BaseItem))
+            self.assertFalse(isinstance("foo", Item))
+            self.assertFalse(isinstance("foo", SubclassedItem))
+            self.assertTrue(isinstance(_BaseItem(), _BaseItem))
+            self.assertTrue(isinstance(Item(), Item))
+            self.assertTrue(isinstance(SubclassedItem(), SubclassedItem))
+            self.assertEqual(len(warnings), 0)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_link.py b/tests/test_link.py
index 0b79e47cd..e0f1efffa 100644
--- a/tests/test_link.py
+++ b/tests/test_link.py
@@ -1,8 +1,8 @@
 import unittest
-import warnings
 
 from scrapy.link import Link
 
+
 class LinkTest(unittest.TestCase):
 
     def _assert_same_links(self, link1, link2):
@@ -14,38 +14,35 @@ class LinkTest(unittest.TestCase):
         self.assertNotEqual(hash(link1), hash(link2))
 
     def test_eq_and_hash(self):
-        l1 = Link(b"http://www.example.com")
-        l2 = Link(b"http://www.example.com/other")
-        l3 = Link(b"http://www.example.com")
+        l1 = Link("http://www.example.com")
+        l2 = Link("http://www.example.com/other")
+        l3 = Link("http://www.example.com")
 
         self._assert_same_links(l1, l1)
         self._assert_different_links(l1, l2)
         self._assert_same_links(l1, l3)
 
-        l4 = Link(b"http://www.example.com", text="test")
-        l5 = Link(b"http://www.example.com", text="test2")
-        l6 = Link(b"http://www.example.com", text="test")
+        l4 = Link("http://www.example.com", text="test")
+        l5 = Link("http://www.example.com", text="test2")
+        l6 = Link("http://www.example.com", text="test")
 
         self._assert_same_links(l4, l4)
         self._assert_different_links(l4, l5)
         self._assert_same_links(l4, l6)
 
-        l7 = Link(b"http://www.example.com", text="test", fragment='something', nofollow=False)
-        l8 = Link(b"http://www.example.com", text="test", fragment='something', nofollow=False)
-        l9 = Link(b"http://www.example.com", text="test", fragment='something', nofollow=True)
-        l10 = Link(b"http://www.example.com", text="test", fragment='other', nofollow=False)
+        l7 = Link("http://www.example.com", text="test", fragment='something', nofollow=False)
+        l8 = Link("http://www.example.com", text="test", fragment='something', nofollow=False)
+        l9 = Link("http://www.example.com", text="test", fragment='something', nofollow=True)
+        l10 = Link("http://www.example.com", text="test", fragment='other', nofollow=False)
         self._assert_same_links(l7, l8)
         self._assert_different_links(l7, l9)
         self._assert_different_links(l7, l10)
 
     def test_repr(self):
-        l1 = Link(b"http://www.example.com", text="test", fragment='something', nofollow=True)
+        l1 = Link("http://www.example.com", text="test", fragment='something', nofollow=True)
         l2 = eval(repr(l1))
         self._assert_same_links(l1, l2)
 
-    def test_unicode_url(self):
-        with warnings.catch_warnings(record=True) as w:
-            link = Link(u"http://www.example.com/\xa3")
-            self.assertIsInstance(link.url, bytes)
-            self.assertEqual(link.url, b'http://www.example.com/\xc2\xa3')
-            assert len(w) == 1, "warning not issued"
+    def test_bytes_url(self):
+        with self.assertRaises(TypeError):
+            Link(b"http://www.example.com/\xc2\xa3")
diff --git a/tests/test_linkextractors.py b/tests/test_linkextractors.py
new file mode 100644
index 000000000..6f133d77a
--- /dev/null
+++ b/tests/test_linkextractors.py
@@ -0,0 +1,548 @@
+import pickle
+import re
+import unittest
+from warnings import catch_warnings
+
+from scrapy.exceptions import ScrapyDeprecationWarning
+from scrapy.http import HtmlResponse, XmlResponse
+from scrapy.link import Link
+from scrapy.linkextractors import FilteringLinkExtractor
+from scrapy.linkextractors.lxmlhtml import LxmlLinkExtractor
+from tests import get_testdata
+
+
+# a hack to skip base class tests in pytest
+class Base:
+    class LinkExtractorTestCase(unittest.TestCase):
+        extractor_cls = None
+
+        def setUp(self):
+            body = get_testdata('link_extractor', 'linkextractor.html')
+            self.response = HtmlResponse(url='http://example.com/index', body=body)
+
+        def test_urls_type(self):
+            ''' Test that the resulting urls are str objects '''
+            lx = self.extractor_cls()
+            self.assertTrue(all(isinstance(link.url, str)
+                                for link in lx.extract_links(self.response)))
+
+        def test_extract_all_links(self):
+            lx = self.extractor_cls()
+            page4_url = 'http://example.com/page%204.html'
+
+            self.assertEqual([link for link in lx.extract_links(self.response)], [
+                Link(url='http://example.com/sample1.html', text=''),
+                Link(url='http://example.com/sample2.html', text='sample 2'),
+                Link(url='http://example.com/sample3.html', text='sample 3 text'),
+                Link(url='http://example.com/sample3.html#foo', text='sample 3 repetition with fragment'),
+                Link(url='http://www.google.com/something', text=''),
+                Link(url='http://example.com/innertag.html', text='inner tag'),
+                Link(url=page4_url, text='href with whitespaces'),
+            ])
+
+        def test_extract_filter_allow(self):
+            lx = self.extractor_cls(allow=('sample', ))
+            self.assertEqual([link for link in lx.extract_links(self.response)], [
+                Link(url='http://example.com/sample1.html', text=''),
+                Link(url='http://example.com/sample2.html', text='sample 2'),
+                Link(url='http://example.com/sample3.html', text='sample 3 text'),
+                Link(url='http://example.com/sample3.html#foo', text='sample 3 repetition with fragment')
+            ])
+
+        def test_extract_filter_allow_with_duplicates(self):
+            lx = self.extractor_cls(allow=('sample', ), unique=False)
+            self.assertEqual([link for link in lx.extract_links(self.response)], [
+                Link(url='http://example.com/sample1.html', text=''),
+                Link(url='http://example.com/sample2.html', text='sample 2'),
+                Link(url='http://example.com/sample3.html', text='sample 3 text'),
+                Link(url='http://example.com/sample3.html', text='sample 3 repetition'),
+                Link(url='http://example.com/sample3.html#foo', text='sample 3 repetition with fragment')
+            ])
+
+        def test_extract_filter_allow_with_duplicates_canonicalize(self):
+            lx = self.extractor_cls(allow=('sample', ), unique=False,
+                                    canonicalize=True)
+            self.assertEqual([link for link in lx.extract_links(self.response)], [
+                Link(url='http://example.com/sample1.html', text=''),
+                Link(url='http://example.com/sample2.html', text='sample 2'),
+                Link(url='http://example.com/sample3.html', text='sample 3 text'),
+                Link(url='http://example.com/sample3.html', text='sample 3 repetition'),
+                Link(url='http://example.com/sample3.html', text='sample 3 repetition with fragment')
+            ])
+
+        def test_extract_filter_allow_no_duplicates_canonicalize(self):
+            lx = self.extractor_cls(allow=('sample',), unique=True,
+                                    canonicalize=True)
+            self.assertEqual([link for link in lx.extract_links(self.response)], [
+                Link(url='http://example.com/sample1.html', text=''),
+                Link(url='http://example.com/sample2.html', text='sample 2'),
+                Link(url='http://example.com/sample3.html', text='sample 3 text'),
+            ])
+
+        def test_extract_filter_allow_and_deny(self):
+            lx = self.extractor_cls(allow=('sample', ), deny=('3', ))
+            self.assertEqual([link for link in lx.extract_links(self.response)], [
+                Link(url='http://example.com/sample1.html', text=''),
+                Link(url='http://example.com/sample2.html', text='sample 2'),
+            ])
+
+        def test_extract_filter_allowed_domains(self):
+            lx = self.extractor_cls(allow_domains=('google.com', ))
+            self.assertEqual([link for link in lx.extract_links(self.response)], [
+                Link(url='http://www.google.com/something', text=''),
+            ])
+
+        def test_extraction_using_single_values(self):
+            '''Test the extractor's behaviour among different situations'''
+
+            lx = self.extractor_cls(allow='sample')
+            self.assertEqual([link for link in lx.extract_links(self.response)], [
+                Link(url='http://example.com/sample1.html', text=''),
+                Link(url='http://example.com/sample2.html', text='sample 2'),
+                Link(url='http://example.com/sample3.html', text='sample 3 text'),
+                Link(url='http://example.com/sample3.html#foo',
+                     text='sample 3 repetition with fragment')
+            ])
+
+            lx = self.extractor_cls(allow='sample', deny='3')
+            self.assertEqual([link for link in lx.extract_links(self.response)], [
+                Link(url='http://example.com/sample1.html', text=''),
+                Link(url='http://example.com/sample2.html', text='sample 2'),
+            ])
+
+            lx = self.extractor_cls(allow_domains='google.com')
+            self.assertEqual([link for link in lx.extract_links(self.response)], [
+                Link(url='http://www.google.com/something', text=''),
+            ])
+
+            lx = self.extractor_cls(deny_domains='example.com')
+            self.assertEqual([link for link in lx.extract_links(self.response)], [
+                Link(url='http://www.google.com/something', text=''),
+            ])
+
+        def test_nofollow(self):
+            '''Test the extractor's behaviour for links with rel="nofollow"'''
+
+            html = b"""<html><head><title>Page title<title>
+            <body>
+            <div class='links'>
+            <p><a href="/about.html">About us</a></p>
+            </div>
+            <div>
+            <p><a href="/follow.html">Follow this link</a></p>
+            </div>
+            <div>
+            <p><a href="/nofollow.html" rel="nofollow">Dont follow this one</a></p>
+            </div>
+            <div>
+            <p><a href="/nofollow2.html" rel="blah">Choose to follow or not</a></p>
+            </div>
+            <div>
+            <p><a href="http://google.com/something" rel="external nofollow">External link not to follow</a></p>
+            </div>
+            </body></html>"""
+            response = HtmlResponse("http://example.org/somepage/index.html", body=html)
+
+            lx = self.extractor_cls()
+            self.assertEqual(lx.extract_links(response), [
+                Link(url='http://example.org/about.html', text='About us'),
+                Link(url='http://example.org/follow.html', text='Follow this link'),
+                Link(url='http://example.org/nofollow.html', text='Dont follow this one', nofollow=True),
+                Link(url='http://example.org/nofollow2.html', text='Choose to follow or not'),
+                Link(url='http://google.com/something', text='External link not to follow', nofollow=True),
+            ])
+
+        def test_matches(self):
+            url1 = 'http://lotsofstuff.com/stuff1/index'
+            url2 = 'http://evenmorestuff.com/uglystuff/index'
+
+            lx = self.extractor_cls(allow=(r'stuff1', ))
+            self.assertEqual(lx.matches(url1), True)
+            self.assertEqual(lx.matches(url2), False)
+
+            lx = self.extractor_cls(deny=(r'uglystuff', ))
+            self.assertEqual(lx.matches(url1), True)
+            self.assertEqual(lx.matches(url2), False)
+
+            lx = self.extractor_cls(allow_domains=('evenmorestuff.com', ))
+            self.assertEqual(lx.matches(url1), False)
+            self.assertEqual(lx.matches(url2), True)
+
+            lx = self.extractor_cls(deny_domains=('lotsofstuff.com', ))
+            self.assertEqual(lx.matches(url1), False)
+            self.assertEqual(lx.matches(url2), True)
+
+            lx = self.extractor_cls(allow=['blah1'], deny=['blah2'],
+                                    allow_domains=['blah1.com'],
+                                    deny_domains=['blah2.com'])
+            self.assertEqual(lx.matches('http://blah1.com/blah1'), True)
+            self.assertEqual(lx.matches('http://blah1.com/blah2'), False)
+            self.assertEqual(lx.matches('http://blah2.com/blah1'), False)
+            self.assertEqual(lx.matches('http://blah2.com/blah2'), False)
+
+        def test_restrict_xpaths(self):
+            lx = self.extractor_cls(restrict_xpaths=('//div[@id="subwrapper"]', ))
+            self.assertEqual([link for link in lx.extract_links(self.response)], [
+                Link(url='http://example.com/sample1.html', text=''),
+                Link(url='http://example.com/sample2.html', text='sample 2'),
+            ])
+
+        def test_restrict_xpaths_encoding(self):
+            """Test restrict_xpaths with encodings"""
+            html = b"""<html><head><title>Page title<title>
+            <body><p><a href="item/12.html">Item 12</a></p>
+            <div class='links'>
+            <p><a href="/about.html">About us\xa3</a></p>
+            </div>
+            <div>
+            <p><a href="/nofollow.html">This shouldn't be followed</a></p>
+            </div>
+            </body></html>"""
+            response = HtmlResponse("http://example.org/somepage/index.html", body=html, encoding='windows-1252')
+
+            lx = self.extractor_cls(restrict_xpaths="//div[@class='links']")
+            self.assertEqual(lx.extract_links(response),
+                             [Link(url='http://example.org/about.html', text='About us\xa3')])
+
+        def test_restrict_xpaths_with_html_entities(self):
+            html = b'<html><body><p><a href="/&hearts;/you?c=&euro;">text</a></p></body></html>'
+            response = HtmlResponse("http://example.org/somepage/index.html", body=html, encoding='iso8859-15')
+            links = self.extractor_cls(restrict_xpaths='//p').extract_links(response)
+            self.assertEqual(links,
+                             [Link(url='http://example.org/%E2%99%A5/you?c=%A4', text='text')])
+
+        def test_restrict_xpaths_concat_in_handle_data(self):
+            """html entities cause SGMLParser to call handle_data hook twice"""
+            body = b"""<html><body><div><a href="/foo">&gt;\xbe\xa9&lt;\xb6\xab</a></body></html>"""
+            response = HtmlResponse("http://example.org", body=body, encoding='gb18030')
+            lx = self.extractor_cls(restrict_xpaths="//div")
+            self.assertEqual(lx.extract_links(response),
+                             [Link(url='http://example.org/foo', text='>\u4eac<\u4e1c',
+                                   fragment='', nofollow=False)])
+
+        def test_restrict_css(self):
+            lx = self.extractor_cls(restrict_css=('#subwrapper a',))
+            self.assertEqual(lx.extract_links(self.response), [
+                Link(url='http://example.com/sample2.html', text='sample 2')
+            ])
+
+        def test_restrict_css_and_restrict_xpaths_together(self):
+            lx = self.extractor_cls(restrict_xpaths=('//div[@id="subwrapper"]', ),
+                                    restrict_css=('#subwrapper + a', ))
+            self.assertEqual([link for link in lx.extract_links(self.response)], [
+                Link(url='http://example.com/sample1.html', text=''),
+                Link(url='http://example.com/sample2.html', text='sample 2'),
+                Link(url='http://example.com/sample3.html', text='sample 3 text'),
+            ])
+
+        def test_area_tag_with_unicode_present(self):
+            body = b"""<html><body>\xbe\xa9<map><area href="http://example.org/foo" /></map></body></html>"""
+            response = HtmlResponse("http://example.org", body=body, encoding='utf-8')
+            lx = self.extractor_cls()
+            lx.extract_links(response)
+            lx.extract_links(response)
+            lx.extract_links(response)
+            self.assertEqual(lx.extract_links(response),
+                             [Link(url='http://example.org/foo', text='',
+                                   fragment='', nofollow=False)])
+
+        def test_encoded_url(self):
+            body = b"""<html><body><div><a href="?page=2">BinB</a></body></html>"""
+            response = HtmlResponse("http://known.fm/AC%2FDC/", body=body, encoding='utf8')
+            lx = self.extractor_cls()
+            self.assertEqual(lx.extract_links(response), [
+                Link(url='http://known.fm/AC%2FDC/?page=2', text='BinB', fragment='', nofollow=False),
+            ])
+
+        def test_encoded_url_in_restricted_xpath(self):
+            body = b"""<html><body><div><a href="?page=2">BinB</a></body></html>"""
+            response = HtmlResponse("http://known.fm/AC%2FDC/", body=body, encoding='utf8')
+            lx = self.extractor_cls(restrict_xpaths="//div")
+            self.assertEqual(lx.extract_links(response), [
+                Link(url='http://known.fm/AC%2FDC/?page=2', text='BinB', fragment='', nofollow=False),
+            ])
+
+        def test_ignored_extensions(self):
+            # jpg is ignored by default
+            html = b"""<a href="page.html">asd</a> and <a href="photo.jpg">"""
+            response = HtmlResponse("http://example.org/", body=html)
+            lx = self.extractor_cls()
+            self.assertEqual(lx.extract_links(response), [
+                Link(url='http://example.org/page.html', text='asd'),
+            ])
+
+            # override denied extensions
+            lx = self.extractor_cls(deny_extensions=['html'])
+            self.assertEqual(lx.extract_links(response), [
+                Link(url='http://example.org/photo.jpg'),
+            ])
+
+        def test_process_value(self):
+            """Test restrict_xpaths with encodings"""
+            html = b"""
+<a href="javascript:goToPage('../other/page.html','photo','width=600,height=540,scrollbars'); return false">Text</a>
+<a href="/about.html">About us</a>
+            """
+            response = HtmlResponse("http://example.org/somepage/index.html", body=html, encoding='windows-1252')
+
+            def process_value(value):
+                m = re.search(r"javascript:goToPage\('(.*?)'", value)
+                if m:
+                    return m.group(1)
+
+            lx = self.extractor_cls(process_value=process_value)
+            self.assertEqual(lx.extract_links(response),
+                             [Link(url='http://example.org/other/page.html', text='Text')])
+
+        def test_base_url_with_restrict_xpaths(self):
+            html = b"""<html><head><title>Page title<title><base href="http://otherdomain.com/base/" />
+            <body><p><a href="item/12.html">Item 12</a></p>
+            </body></html>"""
+            response = HtmlResponse("http://example.org/somepage/index.html", body=html)
+            lx = self.extractor_cls(restrict_xpaths="//p")
+            self.assertEqual(lx.extract_links(response),
+                             [Link(url='http://otherdomain.com/base/item/12.html', text='Item 12')])
+
+        def test_attrs(self):
+            lx = self.extractor_cls(attrs="href")
+            page4_url = 'http://example.com/page%204.html'
+
+            self.assertEqual(lx.extract_links(self.response), [
+                Link(url='http://example.com/sample1.html', text=''),
+                Link(url='http://example.com/sample2.html', text='sample 2'),
+                Link(url='http://example.com/sample3.html', text='sample 3 text'),
+                Link(url='http://example.com/sample3.html#foo', text='sample 3 repetition with fragment'),
+                Link(url='http://www.google.com/something', text=''),
+                Link(url='http://example.com/innertag.html', text='inner tag'),
+                Link(url=page4_url, text='href with whitespaces'),
+            ])
+
+            lx = self.extractor_cls(attrs=("href", "src"), tags=("a", "area", "img"), deny_extensions=())
+            self.assertEqual(lx.extract_links(self.response), [
+                Link(url='http://example.com/sample1.html', text=''),
+                Link(url='http://example.com/sample2.html', text='sample 2'),
+                Link(url='http://example.com/sample2.jpg', text=''),
+                Link(url='http://example.com/sample3.html', text='sample 3 text'),
+                Link(url='http://example.com/sample3.html#foo', text='sample 3 repetition with fragment'),
+                Link(url='http://www.google.com/something', text=''),
+                Link(url='http://example.com/innertag.html', text='inner tag'),
+                Link(url=page4_url, text='href with whitespaces'),
+            ])
+
+            lx = self.extractor_cls(attrs=None)
+            self.assertEqual(lx.extract_links(self.response), [])
+
+        def test_tags(self):
+            html = (
+                b'<html><area href="sample1.html"></area>'
+                b'<a href="sample2.html">sample 2</a><img src="sample2.jpg"/></html>'
+            )
+            response = HtmlResponse("http://example.com/index.html", body=html)
+
+            lx = self.extractor_cls(tags=None)
+            self.assertEqual(lx.extract_links(response), [])
+
+            lx = self.extractor_cls()
+            self.assertEqual(lx.extract_links(response), [
+                Link(url='http://example.com/sample1.html', text=''),
+                Link(url='http://example.com/sample2.html', text='sample 2'),
+            ])
+
+            lx = self.extractor_cls(tags="area")
+            self.assertEqual(lx.extract_links(response), [
+                Link(url='http://example.com/sample1.html', text=''),
+            ])
+
+            lx = self.extractor_cls(tags="a")
+            self.assertEqual(lx.extract_links(response), [
+                Link(url='http://example.com/sample2.html', text='sample 2'),
+            ])
+
+            lx = self.extractor_cls(tags=("a", "img"), attrs=("href", "src"), deny_extensions=())
+            self.assertEqual(lx.extract_links(response), [
+                Link(url='http://example.com/sample2.html', text='sample 2'),
+                Link(url='http://example.com/sample2.jpg', text=''),
+            ])
+
+        def test_tags_attrs(self):
+            html = b"""
+            <html><body>
+            <div id="item1" data-url="get?id=1"><a href="#">Item 1</a></div>
+            <div id="item2" data-url="get?id=2"><a href="#">Item 2</a></div>
+            </body></html>
+            """
+            response = HtmlResponse("http://example.com/index.html", body=html)
+
+            lx = self.extractor_cls(tags='div', attrs='data-url')
+            self.assertEqual(lx.extract_links(response), [
+                Link(url='http://example.com/get?id=1', text='Item 1', fragment='', nofollow=False),
+                Link(url='http://example.com/get?id=2', text='Item 2', fragment='', nofollow=False)
+            ])
+
+            lx = self.extractor_cls(tags=('div',), attrs=('data-url',))
+            self.assertEqual(lx.extract_links(response), [
+                Link(url='http://example.com/get?id=1', text='Item 1', fragment='', nofollow=False),
+                Link(url='http://example.com/get?id=2', text='Item 2', fragment='', nofollow=False)
+            ])
+
+        def test_xhtml(self):
+            xhtml = b"""
+    <?xml version="1.0"?>
+    <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+        "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+    <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+    <head>
+        <title>XHTML document title</title>
+    </head>
+    <body>
+        <div class='links'>
+        <p><a href="/about.html">About us</a></p>
+        </div>
+        <div>
+        <p><a href="/follow.html">Follow this link</a></p>
+        </div>
+        <div>
+        <p><a href="/nofollow.html" rel="nofollow">Dont follow this one</a></p>
+        </div>
+        <div>
+        <p><a href="/nofollow2.html" rel="blah">Choose to follow or not</a></p>
+        </div>
+        <div>
+        <p><a href="http://google.com/something" rel="external nofollow">External link not to follow</a></p>
+        </div>
+    </body>
+    </html>
+            """
+
+            response = HtmlResponse("http://example.com/index.xhtml", body=xhtml)
+
+            lx = self.extractor_cls()
+            self.assertEqual(
+                lx.extract_links(response),
+                [
+                    Link(url='http://example.com/about.html', text='About us', fragment='', nofollow=False),
+                    Link(url='http://example.com/follow.html', text='Follow this link', fragment='', nofollow=False),
+                    Link(url='http://example.com/nofollow.html', text='Dont follow this one',
+                         fragment='', nofollow=True),
+                    Link(url='http://example.com/nofollow2.html', text='Choose to follow or not',
+                         fragment='', nofollow=False),
+                    Link(url='http://google.com/something', text='External link not to follow', nofollow=True),
+                ]
+            )
+
+            response = XmlResponse("http://example.com/index.xhtml", body=xhtml)
+
+            lx = self.extractor_cls()
+            self.assertEqual(
+                lx.extract_links(response),
+                [
+                    Link(url='http://example.com/about.html', text='About us', fragment='', nofollow=False),
+                    Link(url='http://example.com/follow.html', text='Follow this link', fragment='', nofollow=False),
+                    Link(url='http://example.com/nofollow.html', text='Dont follow this one',
+                         fragment='', nofollow=True),
+                    Link(url='http://example.com/nofollow2.html', text='Choose to follow or not',
+                         fragment='', nofollow=False),
+                    Link(url='http://google.com/something', text='External link not to follow', nofollow=True),
+                ]
+            )
+
+        def test_link_wrong_href(self):
+            html = b"""
+            <a href="http://example.org/item1.html">Item 1</a>
+            <a href="http://[example.org/item2.html">Item 2</a>
+            <a href="http://example.org/item3.html">Item 3</a>
+            """
+            response = HtmlResponse("http://example.org/index.html", body=html)
+            lx = self.extractor_cls()
+            self.assertEqual([link for link in lx.extract_links(response)], [
+                Link(url='http://example.org/item1.html', text='Item 1', nofollow=False),
+                Link(url='http://example.org/item3.html', text='Item 3', nofollow=False),
+            ])
+
+        def test_ftp_links(self):
+            body = b"""
+            <html><body>
+            <div><a href="ftp://www.external.com/">An Item</a></div>
+            </body></html>"""
+            response = HtmlResponse("http://www.example.com/index.html", body=body, encoding='utf8')
+            lx = self.extractor_cls()
+            self.assertEqual(lx.extract_links(response), [
+                Link(url='ftp://www.external.com/', text='An Item', fragment='', nofollow=False),
+            ])
+
+        def test_pickle_extractor(self):
+            lx = self.extractor_cls()
+            self.assertIsInstance(pickle.loads(pickle.dumps(lx)), self.extractor_cls)
+
+
+class LxmlLinkExtractorTestCase(Base.LinkExtractorTestCase):
+    extractor_cls = LxmlLinkExtractor
+
+    def test_link_wrong_href(self):
+        html = b"""
+        <a href="http://example.org/item1.html">Item 1</a>
+        <a href="http://[example.org/item2.html">Item 2</a>
+        <a href="http://example.org/item3.html">Item 3</a>
+        """
+        response = HtmlResponse("http://example.org/index.html", body=html)
+        lx = self.extractor_cls()
+        self.assertEqual([link for link in lx.extract_links(response)], [
+            Link(url='http://example.org/item1.html', text='Item 1', nofollow=False),
+            Link(url='http://example.org/item3.html', text='Item 3', nofollow=False),
+        ])
+
+    def test_link_restrict_text(self):
+        html = b"""
+        <a href="http://example.org/item1.html">Pic of a cat</a>
+        <a href="http://example.org/item2.html">Pic of a dog</a>
+        <a href="http://example.org/item3.html">Pic of a cow</a>
+        """
+        response = HtmlResponse("http://example.org/index.html", body=html)
+        # Simple text inclusion test
+        lx = self.extractor_cls(restrict_text='dog')
+        self.assertEqual([link for link in lx.extract_links(response)], [
+            Link(url='http://example.org/item2.html', text='Pic of a dog', nofollow=False),
+        ])
+        # Unique regex test
+        lx = self.extractor_cls(restrict_text=r'of.*dog')
+        self.assertEqual([link for link in lx.extract_links(response)], [
+            Link(url='http://example.org/item2.html', text='Pic of a dog', nofollow=False),
+        ])
+        # Multiple regex test
+        lx = self.extractor_cls(restrict_text=[r'of.*dog', r'of.*cat'])
+        self.assertEqual([link for link in lx.extract_links(response)], [
+            Link(url='http://example.org/item1.html', text='Pic of a cat', nofollow=False),
+            Link(url='http://example.org/item2.html', text='Pic of a dog', nofollow=False),
+        ])
+
+    def test_restrict_xpaths_with_html_entities(self):
+        super().test_restrict_xpaths_with_html_entities()
+
+    def test_filteringlinkextractor_deprecation_warning(self):
+        """Make sure the FilteringLinkExtractor deprecation warning is not
+        issued for LxmlLinkExtractor"""
+        with catch_warnings(record=True) as warnings:
+            LxmlLinkExtractor()
+            self.assertEqual(len(warnings), 0)
+
+            class SubclassedLxmlLinkExtractor(LxmlLinkExtractor):
+                pass
+
+            SubclassedLxmlLinkExtractor()
+            self.assertEqual(len(warnings), 0)
+
+
+class FilteringLinkExtractorTest(unittest.TestCase):
+
+    def test_deprecation_warning(self):
+        args = [None] * 10
+        with catch_warnings(record=True) as warnings:
+            FilteringLinkExtractor(*args)
+            self.assertEqual(len(warnings), 1)
+            self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)
+        with catch_warnings(record=True) as warnings:
+            class SubclassedFilteringLinkExtractor(FilteringLinkExtractor):
+                pass
+            SubclassedFilteringLinkExtractor(*args)
+            self.assertEqual(len(warnings), 1)
+            self.assertEqual(warnings[0].category, ScrapyDeprecationWarning)
diff --git a/tests/test_loader.py b/tests/test_loader.py
new file mode 100644
index 000000000..b0bc82f4e
--- /dev/null
+++ b/tests/test_loader.py
@@ -0,0 +1,556 @@
+import unittest
+
+import attr
+from itemadapter import ItemAdapter
+from itemloaders.processors import Compose, Identity, MapCompose, TakeFirst
+
+from scrapy.http import HtmlResponse
+from scrapy.item import Item, Field
+from scrapy.loader import ItemLoader
+from scrapy.selector import Selector
+
+
+try:
+    from dataclasses import make_dataclass, field as dataclass_field
+except ImportError:
+    make_dataclass = None
+    dataclass_field = None
+
+
+# test items
+class NameItem(Item):
+    name = Field()
+
+
+class TestItem(NameItem):
+    url = Field()
+    summary = Field()
+
+
+class TestNestedItem(Item):
+    name = Field()
+    name_div = Field()
+    name_value = Field()
+
+    url = Field()
+    image = Field()
+
+
+@attr.s
+class AttrsNameItem:
+    name = attr.ib(default="")
+
+
+# test item loaders
+class NameItemLoader(ItemLoader):
+    default_item_class = TestItem
+
+
+class NestedItemLoader(ItemLoader):
+    default_item_class = TestNestedItem
+
+
+class TestItemLoader(NameItemLoader):
+    name_in = MapCompose(lambda v: v.title())
+
+
+class DefaultedItemLoader(NameItemLoader):
+    default_input_processor = MapCompose(lambda v: v[:-1])
+
+
+# test processors
+def processor_with_args(value, other=None, loader_context=None):
+    if 'key' in loader_context:
+        return loader_context['key']
+    return value
+
+
+class BasicItemLoaderTest(unittest.TestCase):
+
+    def test_add_value_on_unknown_field(self):
+        il = TestItemLoader()
+        self.assertRaises(KeyError, il.add_value, 'wrong_field', ['lala', 'lolo'])
+
+    def test_load_item_using_default_loader(self):
+        i = TestItem()
+        i['summary'] = 'lala'
+        il = ItemLoader(item=i)
+        il.add_value('name', 'marta')
+        item = il.load_item()
+        assert item is i
+        self.assertEqual(item['summary'], ['lala'])
+        self.assertEqual(item['name'], ['marta'])
+
+    def test_load_item_using_custom_loader(self):
+        il = TestItemLoader()
+        il.add_value('name', 'marta')
+        item = il.load_item()
+        self.assertEqual(item['name'], ['Marta'])
+
+
+class InitializationTestMixin:
+
+    item_class = None
+
+    def test_keep_single_value(self):
+        """Loaded item should contain values from the initial item"""
+        input_item = self.item_class(name='foo')
+        il = ItemLoader(item=input_item)
+        loaded_item = il.load_item()
+        self.assertIsInstance(loaded_item, self.item_class)
+        self.assertEqual(ItemAdapter(loaded_item).asdict(), {'name': ['foo']})
+
+    def test_keep_list(self):
+        """Loaded item should contain values from the initial item"""
+        input_item = self.item_class(name=['foo', 'bar'])
+        il = ItemLoader(item=input_item)
+        loaded_item = il.load_item()
+        self.assertIsInstance(loaded_item, self.item_class)
+        self.assertEqual(ItemAdapter(loaded_item).asdict(), {'name': ['foo', 'bar']})
+
+    def test_add_value_singlevalue_singlevalue(self):
+        """Values added after initialization should be appended"""
+        input_item = self.item_class(name='foo')
+        il = ItemLoader(item=input_item)
+        il.add_value('name', 'bar')
+        loaded_item = il.load_item()
+        self.assertIsInstance(loaded_item, self.item_class)
+        self.assertEqual(ItemAdapter(loaded_item).asdict(), {'name': ['foo', 'bar']})
+
+    def test_add_value_singlevalue_list(self):
+        """Values added after initialization should be appended"""
+        input_item = self.item_class(name='foo')
+        il = ItemLoader(item=input_item)
+        il.add_value('name', ['item', 'loader'])
+        loaded_item = il.load_item()
+        self.assertIsInstance(loaded_item, self.item_class)
+        self.assertEqual(ItemAdapter(loaded_item).asdict(), {'name': ['foo', 'item', 'loader']})
+
+    def test_add_value_list_singlevalue(self):
+        """Values added after initialization should be appended"""
+        input_item = self.item_class(name=['foo', 'bar'])
+        il = ItemLoader(item=input_item)
+        il.add_value('name', 'qwerty')
+        loaded_item = il.load_item()
+        self.assertIsInstance(loaded_item, self.item_class)
+        self.assertEqual(ItemAdapter(loaded_item).asdict(), {'name': ['foo', 'bar', 'qwerty']})
+
+    def test_add_value_list_list(self):
+        """Values added after initialization should be appended"""
+        input_item = self.item_class(name=['foo', 'bar'])
+        il = ItemLoader(item=input_item)
+        il.add_value('name', ['item', 'loader'])
+        loaded_item = il.load_item()
+        self.assertIsInstance(loaded_item, self.item_class)
+        self.assertEqual(ItemAdapter(loaded_item).asdict(), {'name': ['foo', 'bar', 'item', 'loader']})
+
+    def test_get_output_value_singlevalue(self):
+        """Getting output value must not remove value from item"""
+        input_item = self.item_class(name='foo')
+        il = ItemLoader(item=input_item)
+        self.assertEqual(il.get_output_value('name'), ['foo'])
+        loaded_item = il.load_item()
+        self.assertIsInstance(loaded_item, self.item_class)
+        self.assertEqual(ItemAdapter(loaded_item).asdict(), dict({'name': ['foo']}))
+
+    def test_get_output_value_list(self):
+        """Getting output value must not remove value from item"""
+        input_item = self.item_class(name=['foo', 'bar'])
+        il = ItemLoader(item=input_item)
+        self.assertEqual(il.get_output_value('name'), ['foo', 'bar'])
+        loaded_item = il.load_item()
+        self.assertIsInstance(loaded_item, self.item_class)
+        self.assertEqual(ItemAdapter(loaded_item).asdict(), dict({'name': ['foo', 'bar']}))
+
+    def test_values_single(self):
+        """Values from initial item must be added to loader._values"""
+        input_item = self.item_class(name='foo')
+        il = ItemLoader(item=input_item)
+        self.assertEqual(il._values.get('name'), ['foo'])
+
+    def test_values_list(self):
+        """Values from initial item must be added to loader._values"""
+        input_item = self.item_class(name=['foo', 'bar'])
+        il = ItemLoader(item=input_item)
+        self.assertEqual(il._values.get('name'), ['foo', 'bar'])
+
+
+class InitializationFromDictTest(InitializationTestMixin, unittest.TestCase):
+    item_class = dict
+
+
+class InitializationFromItemTest(InitializationTestMixin, unittest.TestCase):
+    item_class = NameItem
+
+
+class InitializationFromAttrsItemTest(InitializationTestMixin, unittest.TestCase):
+    item_class = AttrsNameItem
+
+
+@unittest.skipIf(not make_dataclass, "dataclasses module is not available")
+class InitializationFromDataClassTest(InitializationTestMixin, unittest.TestCase):
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        if make_dataclass:
+            self.item_class = make_dataclass(
+                "TestDataClass",
+                [("name", list, dataclass_field(default_factory=list))],
+            )
+
+
+class BaseNoInputReprocessingLoader(ItemLoader):
+    title_in = MapCompose(str.upper)
+    title_out = TakeFirst()
+
+
+class NoInputReprocessingItem(Item):
+    title = Field()
+
+
+class NoInputReprocessingItemLoader(BaseNoInputReprocessingLoader):
+    default_item_class = NoInputReprocessingItem
+
+
+class NoInputReprocessingFromItemTest(unittest.TestCase):
+    """
+    Loaders initialized from loaded items must not reprocess fields (Item instances)
+    """
+    def test_avoid_reprocessing_with_initial_values_single(self):
+        il = NoInputReprocessingItemLoader(item=NoInputReprocessingItem(title='foo'))
+        il_loaded = il.load_item()
+        self.assertEqual(il_loaded, {'title': 'foo'})
+        self.assertEqual(NoInputReprocessingItemLoader(item=il_loaded).load_item(), {'title': 'foo'})
+
+    def test_avoid_reprocessing_with_initial_values_list(self):
+        il = NoInputReprocessingItemLoader(item=NoInputReprocessingItem(title=['foo', 'bar']))
+        il_loaded = il.load_item()
+        self.assertEqual(il_loaded, {'title': 'foo'})
+        self.assertEqual(NoInputReprocessingItemLoader(item=il_loaded).load_item(), {'title': 'foo'})
+
+    def test_avoid_reprocessing_without_initial_values_single(self):
+        il = NoInputReprocessingItemLoader()
+        il.add_value('title', 'FOO')
+        il_loaded = il.load_item()
+        self.assertEqual(il_loaded, {'title': 'FOO'})
+        self.assertEqual(NoInputReprocessingItemLoader(item=il_loaded).load_item(), {'title': 'FOO'})
+
+    def test_avoid_reprocessing_without_initial_values_list(self):
+        il = NoInputReprocessingItemLoader()
+        il.add_value('title', ['foo', 'bar'])
+        il_loaded = il.load_item()
+        self.assertEqual(il_loaded, {'title': 'FOO'})
+        self.assertEqual(NoInputReprocessingItemLoader(item=il_loaded).load_item(), {'title': 'FOO'})
+
+
+class TestOutputProcessorItem(unittest.TestCase):
+    def test_output_processor(self):
+
+        class TempItem(Item):
+            temp = Field()
+
+            def __init__(self, *args, **kwargs):
+                super().__init__(self, *args, **kwargs)
+                self.setdefault('temp', 0.3)
+
+        class TempLoader(ItemLoader):
+            default_item_class = TempItem
+            default_input_processor = Identity()
+            default_output_processor = Compose(TakeFirst())
+
+        loader = TempLoader()
+        item = loader.load_item()
+        self.assertIsInstance(item, TempItem)
+        self.assertEqual(dict(item), {'temp': 0.3})
+
+
+class SelectortemLoaderTest(unittest.TestCase):
+    response = HtmlResponse(url="", encoding='utf-8', body=b"""
+    <html>
+    <body>
+    <div id="id">marta</div>
+    <p>paragraph</p>
+    <a href="http://www.scrapy.org">homepage</a>
+    <img src="/images/logo.png" width="244" height="65" alt="Scrapy">
+    </body>
+    </html>
+    """)
+
+    def test_init_method(self):
+        l = TestItemLoader()
+        self.assertEqual(l.selector, None)
+
+    def test_init_method_errors(self):
+        l = TestItemLoader()
+        self.assertRaises(RuntimeError, l.add_xpath, 'url', '//a/@href')
+        self.assertRaises(RuntimeError, l.replace_xpath, 'url', '//a/@href')
+        self.assertRaises(RuntimeError, l.get_xpath, '//a/@href')
+        self.assertRaises(RuntimeError, l.add_css, 'name', '#name::text')
+        self.assertRaises(RuntimeError, l.replace_css, 'name', '#name::text')
+        self.assertRaises(RuntimeError, l.get_css, '#name::text')
+
+    def test_init_method_with_selector(self):
+        sel = Selector(text="<html><body><div>marta</div></body></html>")
+        l = TestItemLoader(selector=sel)
+        self.assertIs(l.selector, sel)
+
+        l.add_xpath('name', '//div/text()')
+        self.assertEqual(l.get_output_value('name'), ['Marta'])
+
+    def test_init_method_with_selector_css(self):
+        sel = Selector(text="<html><body><div>marta</div></body></html>")
+        l = TestItemLoader(selector=sel)
+        self.assertIs(l.selector, sel)
+
+        l.add_css('name', 'div::text')
+        self.assertEqual(l.get_output_value('name'), ['Marta'])
+
+    def test_init_method_with_response(self):
+        l = TestItemLoader(response=self.response)
+        self.assertTrue(l.selector)
+
+        l.add_xpath('name', '//div/text()')
+        self.assertEqual(l.get_output_value('name'), ['Marta'])
+
+    def test_init_method_with_response_css(self):
+        l = TestItemLoader(response=self.response)
+        self.assertTrue(l.selector)
+
+        l.add_css('name', 'div::text')
+        self.assertEqual(l.get_output_value('name'), ['Marta'])
+
+        l.add_css('url', 'a::attr(href)')
+        self.assertEqual(l.get_output_value('url'), ['http://www.scrapy.org'])
+
+        # combining/accumulating CSS selectors and XPath expressions
+        l.add_xpath('name', '//div/text()')
+        self.assertEqual(l.get_output_value('name'), ['Marta', 'Marta'])
+
+        l.add_xpath('url', '//img/@src')
+        self.assertEqual(l.get_output_value('url'), ['http://www.scrapy.org', '/images/logo.png'])
+
+    def test_add_xpath_re(self):
+        l = TestItemLoader(response=self.response)
+        l.add_xpath('name', '//div/text()', re='ma')
+        self.assertEqual(l.get_output_value('name'), ['Ma'])
+
+    def test_replace_xpath(self):
+        l = TestItemLoader(response=self.response)
+        self.assertTrue(l.selector)
+        l.add_xpath('name', '//div/text()')
+        self.assertEqual(l.get_output_value('name'), ['Marta'])
+        l.replace_xpath('name', '//p/text()')
+        self.assertEqual(l.get_output_value('name'), ['Paragraph'])
+
+        l.replace_xpath('name', ['//p/text()', '//div/text()'])
+        self.assertEqual(l.get_output_value('name'), ['Paragraph', 'Marta'])
+
+    def test_get_xpath(self):
+        l = TestItemLoader(response=self.response)
+        self.assertEqual(l.get_xpath('//p/text()'), ['paragraph'])
+        self.assertEqual(l.get_xpath('//p/text()', TakeFirst()), 'paragraph')
+        self.assertEqual(l.get_xpath('//p/text()', TakeFirst(), re='pa'), 'pa')
+
+        self.assertEqual(l.get_xpath(['//p/text()', '//div/text()']), ['paragraph', 'marta'])
+
+    def test_replace_xpath_multi_fields(self):
+        l = TestItemLoader(response=self.response)
+        l.add_xpath(None, '//div/text()', TakeFirst(), lambda x: {'name': x})
+        self.assertEqual(l.get_output_value('name'), ['Marta'])
+        l.replace_xpath(None, '//p/text()', TakeFirst(), lambda x: {'name': x})
+        self.assertEqual(l.get_output_value('name'), ['Paragraph'])
+
+    def test_replace_xpath_re(self):
+        l = TestItemLoader(response=self.response)
+        self.assertTrue(l.selector)
+        l.add_xpath('name', '//div/text()')
+        self.assertEqual(l.get_output_value('name'), ['Marta'])
+        l.replace_xpath('name', '//div/text()', re='ma')
+        self.assertEqual(l.get_output_value('name'), ['Ma'])
+
+    def test_add_css_re(self):
+        l = TestItemLoader(response=self.response)
+        l.add_css('name', 'div::text', re='ma')
+        self.assertEqual(l.get_output_value('name'), ['Ma'])
+
+        l.add_css('url', 'a::attr(href)', re='http://(.+)')
+        self.assertEqual(l.get_output_value('url'), ['www.scrapy.org'])
+
+    def test_replace_css(self):
+        l = TestItemLoader(response=self.response)
+        self.assertTrue(l.selector)
+        l.add_css('name', 'div::text')
+        self.assertEqual(l.get_output_value('name'), ['Marta'])
+        l.replace_css('name', 'p::text')
+        self.assertEqual(l.get_output_value('name'), ['Paragraph'])
+
+        l.replace_css('name', ['p::text', 'div::text'])
+        self.assertEqual(l.get_output_value('name'), ['Paragraph', 'Marta'])
+
+        l.add_css('url', 'a::attr(href)', re='http://(.+)')
+        self.assertEqual(l.get_output_value('url'), ['www.scrapy.org'])
+        l.replace_css('url', 'img::attr(src)')
+        self.assertEqual(l.get_output_value('url'), ['/images/logo.png'])
+
+    def test_get_css(self):
+        l = TestItemLoader(response=self.response)
+        self.assertEqual(l.get_css('p::text'), ['paragraph'])
+        self.assertEqual(l.get_css('p::text', TakeFirst()), 'paragraph')
+        self.assertEqual(l.get_css('p::text', TakeFirst(), re='pa'), 'pa')
+
+        self.assertEqual(l.get_css(['p::text', 'div::text']), ['paragraph', 'marta'])
+        self.assertEqual(l.get_css(['a::attr(href)', 'img::attr(src)']),
+                         ['http://www.scrapy.org', '/images/logo.png'])
+
+    def test_replace_css_multi_fields(self):
+        l = TestItemLoader(response=self.response)
+        l.add_css(None, 'div::text', TakeFirst(), lambda x: {'name': x})
+        self.assertEqual(l.get_output_value('name'), ['Marta'])
+        l.replace_css(None, 'p::text', TakeFirst(), lambda x: {'name': x})
+        self.assertEqual(l.get_output_value('name'), ['Paragraph'])
+
+        l.add_css(None, 'a::attr(href)', TakeFirst(), lambda x: {'url': x})
+        self.assertEqual(l.get_output_value('url'), ['http://www.scrapy.org'])
+        l.replace_css(None, 'img::attr(src)', TakeFirst(), lambda x: {'url': x})
+        self.assertEqual(l.get_output_value('url'), ['/images/logo.png'])
+
+    def test_replace_css_re(self):
+        l = TestItemLoader(response=self.response)
+        self.assertTrue(l.selector)
+        l.add_css('url', 'a::attr(href)')
+        self.assertEqual(l.get_output_value('url'), ['http://www.scrapy.org'])
+        l.replace_css('url', 'a::attr(href)', re=r'http://www\.(.+)')
+        self.assertEqual(l.get_output_value('url'), ['scrapy.org'])
+
+
+class SubselectorLoaderTest(unittest.TestCase):
+    response = HtmlResponse(url="", encoding='utf-8', body=b"""
+    <html>
+    <body>
+    <header>
+      <div id="id">marta</div>
+      <p>paragraph</p>
+    </header>
+    <footer class="footer">
+      <a href="http://www.scrapy.org">homepage</a>
+      <img src="/images/logo.png" width="244" height="65" alt="Scrapy">
+    </footer>
+    </body>
+    </html>
+    """)
+
+    def test_nested_xpath(self):
+        l = NestedItemLoader(response=self.response)
+
+        nl = l.nested_xpath("//header")
+        nl.add_xpath('name', 'div/text()')
+        nl.add_css('name_div', '#id')
+        nl.add_value('name_value', nl.selector.xpath('div[@id = "id"]/text()').getall())
+
+        self.assertEqual(l.get_output_value('name'), ['marta'])
+        self.assertEqual(l.get_output_value('name_div'), ['<div id="id">marta</div>'])
+        self.assertEqual(l.get_output_value('name_value'), ['marta'])
+
+        self.assertEqual(l.get_output_value('name'), nl.get_output_value('name'))
+        self.assertEqual(l.get_output_value('name_div'), nl.get_output_value('name_div'))
+        self.assertEqual(l.get_output_value('name_value'), nl.get_output_value('name_value'))
+
+    def test_nested_css(self):
+        l = NestedItemLoader(response=self.response)
+        nl = l.nested_css("header")
+        nl.add_xpath('name', 'div/text()')
+        nl.add_css('name_div', '#id')
+        nl.add_value('name_value', nl.selector.xpath('div[@id = "id"]/text()').getall())
+
+        self.assertEqual(l.get_output_value('name'), ['marta'])
+        self.assertEqual(l.get_output_value('name_div'), ['<div id="id">marta</div>'])
+        self.assertEqual(l.get_output_value('name_value'), ['marta'])
+
+        self.assertEqual(l.get_output_value('name'), nl.get_output_value('name'))
+        self.assertEqual(l.get_output_value('name_div'), nl.get_output_value('name_div'))
+        self.assertEqual(l.get_output_value('name_value'), nl.get_output_value('name_value'))
+
+    def test_nested_replace(self):
+        l = NestedItemLoader(response=self.response)
+        nl1 = l.nested_xpath('//footer')
+        nl2 = nl1.nested_xpath('a')
+
+        l.add_xpath('url', '//footer/a/@href')
+        self.assertEqual(l.get_output_value('url'), ['http://www.scrapy.org'])
+        nl1.replace_xpath('url', 'img/@src')
+        self.assertEqual(l.get_output_value('url'), ['/images/logo.png'])
+        nl2.replace_xpath('url', '@href')
+        self.assertEqual(l.get_output_value('url'), ['http://www.scrapy.org'])
+
+    def test_nested_ordering(self):
+        l = NestedItemLoader(response=self.response)
+        nl1 = l.nested_xpath('//footer')
+        nl2 = nl1.nested_xpath('a')
+
+        nl1.add_xpath('url', 'img/@src')
+        l.add_xpath('url', '//footer/a/@href')
+        nl2.add_xpath('url', 'text()')
+        l.add_xpath('url', '//footer/a/@href')
+
+        self.assertEqual(l.get_output_value('url'), [
+            '/images/logo.png',
+            'http://www.scrapy.org',
+            'homepage',
+            'http://www.scrapy.org',
+        ])
+
+    def test_nested_load_item(self):
+        l = NestedItemLoader(response=self.response)
+        nl1 = l.nested_xpath('//footer')
+        nl2 = nl1.nested_xpath('img')
+
+        l.add_xpath('name', '//header/div/text()')
+        nl1.add_xpath('url', 'a/@href')
+        nl2.add_xpath('image', '@src')
+
+        item = l.load_item()
+
+        assert item is l.item
+        assert item is nl1.item
+        assert item is nl2.item
+
+        self.assertEqual(item['name'], ['marta'])
+        self.assertEqual(item['url'], ['http://www.scrapy.org'])
+        self.assertEqual(item['image'], ['/images/logo.png'])
+
+
+# Functions as processors
+
+def function_processor_strip(iterable):
+    return [x.strip() for x in iterable]
+
+
+def function_processor_upper(iterable):
+    return [x.upper() for x in iterable]
+
+
+class FunctionProcessorItem(Item):
+    foo = Field(
+        input_processor=function_processor_strip,
+        output_processor=function_processor_upper,
+    )
+
+
+class FunctionProcessorItemLoader(ItemLoader):
+    default_item_class = FunctionProcessorItem
+
+
+class FunctionProcessorTestCase(unittest.TestCase):
+
+    def test_processor_defined_in_item(self):
+        lo = FunctionProcessorItemLoader()
+        lo.add_value('foo', '  bar  ')
+        lo.add_value('foo', ['  asdf  ', '  qwerty  '])
+        self.assertEqual(
+            dict(lo.load_item()),
+            {'foo': ['BAR', 'ASDF', 'QWERTY']}
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_loader_deprecated.py b/tests/test_loader_deprecated.py
new file mode 100644
index 000000000..624dd9ab8
--- /dev/null
+++ b/tests/test_loader_deprecated.py
@@ -0,0 +1,720 @@
+"""
+These tests are kept as references from the ones that were ported to a itemloaders library.
+Once we remove the references from scrapy, we can remove these tests.
+"""
+
+import unittest
+import warnings
+from functools import partial
+
+from itemloaders.processors import (Compose, Identity, Join,
+                                    MapCompose, SelectJmes, TakeFirst)
+
+from scrapy.item import Item, Field
+from scrapy.loader import ItemLoader
+from scrapy.loader.common import wrap_loader_context
+from scrapy.utils.deprecate import ScrapyDeprecationWarning
+from scrapy.utils.misc import extract_regex
+
+
+# test items
+class NameItem(Item):
+    name = Field()
+
+
+class TestItem(NameItem):
+    url = Field()
+    summary = Field()
+
+
+# test item loaders
+class NameItemLoader(ItemLoader):
+    default_item_class = TestItem
+
+
+class TestItemLoader(NameItemLoader):
+    name_in = MapCompose(lambda v: v.title())
+
+
+class DefaultedItemLoader(NameItemLoader):
+    default_input_processor = MapCompose(lambda v: v[:-1])
+
+
+# test processors
+def processor_with_args(value, other=None, loader_context=None):
+    if 'key' in loader_context:
+        return loader_context['key']
+    return value
+
+
+class BasicItemLoaderTest(unittest.TestCase):
+
+    def test_load_item_using_default_loader(self):
+        i = TestItem()
+        i['summary'] = 'lala'
+        il = ItemLoader(item=i)
+        il.add_value('name', 'marta')
+        item = il.load_item()
+        assert item is i
+        self.assertEqual(item['summary'], ['lala'])
+        self.assertEqual(item['name'], ['marta'])
+
+    def test_load_item_using_custom_loader(self):
+        il = TestItemLoader()
+        il.add_value('name', 'marta')
+        item = il.load_item()
+        self.assertEqual(item['name'], ['Marta'])
+
+    def test_load_item_ignore_none_field_values(self):
+        def validate_sku(value):
+            # Let's assume a SKU is only digits.
+            if value.isdigit():
+                return value
+
+        class MyLoader(ItemLoader):
+            name_out = Compose(lambda vs: vs[0])  # take first which allows empty values
+            price_out = Compose(TakeFirst(), float)
+            sku_out = Compose(TakeFirst(), validate_sku)
+
+        valid_fragment = 'SKU: 1234'
+        invalid_fragment = 'SKU: not available'
+        sku_re = 'SKU: (.+)'
+
+        il = MyLoader(item={})
+        # Should not return "sku: None".
+        il.add_value('sku', [invalid_fragment], re=sku_re)
+        # Should not ignore empty values.
+        il.add_value('name', '')
+        il.add_value('price', ['0'])
+        self.assertEqual(il.load_item(), {
+            'name': '',
+            'price': 0.0,
+        })
+
+        il.replace_value('sku', [valid_fragment], re=sku_re)
+        self.assertEqual(il.load_item()['sku'], '1234')
+
+    def test_self_referencing_loader(self):
+        class MyLoader(ItemLoader):
+            url_out = TakeFirst()
+
+            def img_url_out(self, values):
+                return (self.get_output_value('url') or '') + values[0]
+
+        il = MyLoader(item={})
+        il.add_value('url', 'http://example.com/')
+        il.add_value('img_url', '1234.png')
+        self.assertEqual(il.load_item(), {
+            'url': 'http://example.com/',
+            'img_url': 'http://example.com/1234.png',
+        })
+
+        il = MyLoader(item={})
+        il.add_value('img_url', '1234.png')
+        self.assertEqual(il.load_item(), {
+            'img_url': '1234.png',
+        })
+
+    def test_add_value(self):
+        il = TestItemLoader()
+        il.add_value('name', 'marta')
+        self.assertEqual(il.get_collected_values('name'), ['Marta'])
+        self.assertEqual(il.get_output_value('name'), ['Marta'])
+        il.add_value('name', 'pepe')
+        self.assertEqual(il.get_collected_values('name'), ['Marta', 'Pepe'])
+        self.assertEqual(il.get_output_value('name'), ['Marta', 'Pepe'])
+
+        # test add object value
+        il.add_value('summary', {'key': 1})
+        self.assertEqual(il.get_collected_values('summary'), [{'key': 1}])
+
+        il.add_value(None, 'Jim', lambda x: {'name': x})
+        self.assertEqual(il.get_collected_values('name'), ['Marta', 'Pepe', 'Jim'])
+
+    def test_add_zero(self):
+        il = NameItemLoader()
+        il.add_value('name', 0)
+        self.assertEqual(il.get_collected_values('name'), [0])
+
+    def test_replace_value(self):
+        il = TestItemLoader()
+        il.replace_value('name', 'marta')
+        self.assertEqual(il.get_collected_values('name'), ['Marta'])
+        self.assertEqual(il.get_output_value('name'), ['Marta'])
+        il.replace_value('name', 'pepe')
+        self.assertEqual(il.get_collected_values('name'), ['Pepe'])
+        self.assertEqual(il.get_output_value('name'), ['Pepe'])
+
+        il.replace_value(None, 'Jim', lambda x: {'name': x})
+        self.assertEqual(il.get_collected_values('name'), ['Jim'])
+
+    def test_get_value(self):
+        il = NameItemLoader()
+        self.assertEqual('FOO', il.get_value(['foo', 'bar'], TakeFirst(), str.upper))
+        self.assertEqual(['foo', 'bar'], il.get_value(['name:foo', 'name:bar'], re='name:(.*)$'))
+        self.assertEqual('foo', il.get_value(['name:foo', 'name:bar'], TakeFirst(), re='name:(.*)$'))
+
+        il.add_value('name', ['name:foo', 'name:bar'], TakeFirst(), re='name:(.*)$')
+        self.assertEqual(['foo'], il.get_collected_values('name'))
+        il.replace_value('name', 'name:bar', re='name:(.*)$')
+        self.assertEqual(['bar'], il.get_collected_values('name'))
+
+    def test_iter_on_input_processor_input(self):
+        class NameFirstItemLoader(NameItemLoader):
+            name_in = TakeFirst()
+
+        il = NameFirstItemLoader()
+        il.add_value('name', 'marta')
+        self.assertEqual(il.get_collected_values('name'), ['marta'])
+        il = NameFirstItemLoader()
+        il.add_value('name', ['marta', 'jose'])
+        self.assertEqual(il.get_collected_values('name'), ['marta'])
+
+        il = NameFirstItemLoader()
+        il.replace_value('name', 'marta')
+        self.assertEqual(il.get_collected_values('name'), ['marta'])
+        il = NameFirstItemLoader()
+        il.replace_value('name', ['marta', 'jose'])
+        self.assertEqual(il.get_collected_values('name'), ['marta'])
+
+        il = NameFirstItemLoader()
+        il.add_value('name', 'marta')
+        il.add_value('name', ['jose', 'pedro'])
+        self.assertEqual(il.get_collected_values('name'), ['marta', 'jose'])
+
+    def test_map_compose_filter(self):
+        def filter_world(x):
+            return None if x == 'world' else x
+
+        proc = MapCompose(filter_world, str.upper)
+        self.assertEqual(proc(['hello', 'world', 'this', 'is', 'scrapy']),
+                         ['HELLO', 'THIS', 'IS', 'SCRAPY'])
+
+    def test_map_compose_filter_multil(self):
+        class TestItemLoader(NameItemLoader):
+            name_in = MapCompose(lambda v: v.title(), lambda v: v[:-1])
+
+        il = TestItemLoader()
+        il.add_value('name', 'marta')
+        self.assertEqual(il.get_output_value('name'), ['Mart'])
+        item = il.load_item()
+        self.assertEqual(item['name'], ['Mart'])
+
+    def test_default_input_processor(self):
+        il = DefaultedItemLoader()
+        il.add_value('name', 'marta')
+        self.assertEqual(il.get_output_value('name'), ['mart'])
+
+    def test_inherited_default_input_processor(self):
+        class InheritDefaultedItemLoader(DefaultedItemLoader):
+            pass
+
+        il = InheritDefaultedItemLoader()
+        il.add_value('name', 'marta')
+        self.assertEqual(il.get_output_value('name'), ['mart'])
+
+    def test_input_processor_inheritance(self):
+        class ChildItemLoader(TestItemLoader):
+            url_in = MapCompose(lambda v: v.lower())
+
+        il = ChildItemLoader()
+        il.add_value('url', 'HTTP://scrapy.ORG')
+        self.assertEqual(il.get_output_value('url'), ['http://scrapy.org'])
+        il.add_value('name', 'marta')
+        self.assertEqual(il.get_output_value('name'), ['Marta'])
+
+        class ChildChildItemLoader(ChildItemLoader):
+            url_in = MapCompose(lambda v: v.upper())
+            summary_in = MapCompose(lambda v: v)
+
+        il = ChildChildItemLoader()
+        il.add_value('url', 'http://scrapy.org')
+        self.assertEqual(il.get_output_value('url'), ['HTTP://SCRAPY.ORG'])
+        il.add_value('name', 'marta')
+        self.assertEqual(il.get_output_value('name'), ['Marta'])
+
+    def test_empty_map_compose(self):
+        class IdentityDefaultedItemLoader(DefaultedItemLoader):
+            name_in = MapCompose()
+
+        il = IdentityDefaultedItemLoader()
+        il.add_value('name', 'marta')
+        self.assertEqual(il.get_output_value('name'), ['marta'])
+
+    def test_identity_input_processor(self):
+        class IdentityDefaultedItemLoader(DefaultedItemLoader):
+            name_in = Identity()
+
+        il = IdentityDefaultedItemLoader()
+        il.add_value('name', 'marta')
+        self.assertEqual(il.get_output_value('name'), ['marta'])
+
+    def test_extend_custom_input_processors(self):
+        class ChildItemLoader(TestItemLoader):
+            name_in = MapCompose(TestItemLoader.name_in, str.swapcase)
+
+        il = ChildItemLoader()
+        il.add_value('name', 'marta')
+        self.assertEqual(il.get_output_value('name'), ['mARTA'])
+
+    def test_extend_default_input_processors(self):
+        class ChildDefaultedItemLoader(DefaultedItemLoader):
+            name_in = MapCompose(DefaultedItemLoader.default_input_processor, str.swapcase)
+
+        il = ChildDefaultedItemLoader()
+        il.add_value('name', 'marta')
+        self.assertEqual(il.get_output_value('name'), ['MART'])
+
+    def test_output_processor_using_function(self):
+        il = TestItemLoader()
+        il.add_value('name', ['mar', 'ta'])
+        self.assertEqual(il.get_output_value('name'), ['Mar', 'Ta'])
+
+        class TakeFirstItemLoader(TestItemLoader):
+            name_out = " ".join
+
+        il = TakeFirstItemLoader()
+        il.add_value('name', ['mar', 'ta'])
+        self.assertEqual(il.get_output_value('name'), 'Mar Ta')
+
+    def test_output_processor_error(self):
+        class TestItemLoader(ItemLoader):
+            default_item_class = TestItem
+            name_out = MapCompose(float)
+
+        il = TestItemLoader()
+        il.add_value('name', ['$10'])
+        try:
+            float('$10')
+        except Exception as e:
+            expected_exc_str = str(e)
+
+        exc = None
+        try:
+            il.load_item()
+        except Exception as e:
+            exc = e
+        assert isinstance(exc, ValueError)
+        s = str(exc)
+        assert 'name' in s, s
+        assert '$10' in s, s
+        assert 'ValueError' in s, s
+        assert expected_exc_str in s, s
+
+    def test_output_processor_using_classes(self):
+        il = TestItemLoader()
+        il.add_value('name', ['mar', 'ta'])
+        self.assertEqual(il.get_output_value('name'), ['Mar', 'Ta'])
+
+        class TakeFirstItemLoader(TestItemLoader):
+            name_out = Join()
+
+        il = TakeFirstItemLoader()
+        il.add_value('name', ['mar', 'ta'])
+        self.assertEqual(il.get_output_value('name'), 'Mar Ta')
+
+        class TakeFirstItemLoader(TestItemLoader):
+            name_out = Join("<br>")
+
+        il = TakeFirstItemLoader()
+        il.add_value('name', ['mar', 'ta'])
+        self.assertEqual(il.get_output_value('name'), 'Mar<br>Ta')
+
+    def test_default_output_processor(self):
+        il = TestItemLoader()
+        il.add_value('name', ['mar', 'ta'])
+        self.assertEqual(il.get_output_value('name'), ['Mar', 'Ta'])
+
+        class LalaItemLoader(TestItemLoader):
+            default_output_processor = Identity()
+
+        il = LalaItemLoader()
+        il.add_value('name', ['mar', 'ta'])
+        self.assertEqual(il.get_output_value('name'), ['Mar', 'Ta'])
+
+    def test_loader_context_on_declaration(self):
+        class ChildItemLoader(TestItemLoader):
+            url_in = MapCompose(processor_with_args, key='val')
+
+        il = ChildItemLoader()
+        il.add_value('url', 'text')
+        self.assertEqual(il.get_output_value('url'), ['val'])
+        il.replace_value('url', 'text2')
+        self.assertEqual(il.get_output_value('url'), ['val'])
+
+    def test_loader_context_on_instantiation(self):
+        class ChildItemLoader(TestItemLoader):
+            url_in = MapCompose(processor_with_args)
+
+        il = ChildItemLoader(key='val')
+        il.add_value('url', 'text')
+        self.assertEqual(il.get_output_value('url'), ['val'])
+        il.replace_value('url', 'text2')
+        self.assertEqual(il.get_output_value('url'), ['val'])
+
+    def test_loader_context_on_assign(self):
+        class ChildItemLoader(TestItemLoader):
+            url_in = MapCompose(processor_with_args)
+
+        il = ChildItemLoader()
+        il.context['key'] = 'val'
+        il.add_value('url', 'text')
+        self.assertEqual(il.get_output_value('url'), ['val'])
+        il.replace_value('url', 'text2')
+        self.assertEqual(il.get_output_value('url'), ['val'])
+
+    def test_item_passed_to_input_processor_functions(self):
+        def processor(value, loader_context):
+            return loader_context['item']['name']
+
+        class ChildItemLoader(TestItemLoader):
+            url_in = MapCompose(processor)
+
+        it = TestItem(name='marta')
+        il = ChildItemLoader(item=it)
+        il.add_value('url', 'text')
+        self.assertEqual(il.get_output_value('url'), ['marta'])
+        il.replace_value('url', 'text2')
+        self.assertEqual(il.get_output_value('url'), ['marta'])
+
+    def test_compose_processor(self):
+        class TestItemLoader(NameItemLoader):
+            name_out = Compose(lambda v: v[0], lambda v: v.title(), lambda v: v[:-1])
+
+        il = TestItemLoader()
+        il.add_value('name', ['marta', 'other'])
+        self.assertEqual(il.get_output_value('name'), 'Mart')
+        item = il.load_item()
+        self.assertEqual(item['name'], 'Mart')
+
+    def test_partial_processor(self):
+        def join(values, sep=None, loader_context=None, ignored=None):
+            if sep is not None:
+                return sep.join(values)
+            elif loader_context and 'sep' in loader_context:
+                return loader_context['sep'].join(values)
+            else:
+                return ''.join(values)
+
+        class TestItemLoader(NameItemLoader):
+            name_out = Compose(partial(join, sep='+'))
+            url_out = Compose(partial(join, loader_context={'sep': '.'}))
+            summary_out = Compose(partial(join, ignored='foo'))
+
+        il = TestItemLoader()
+        il.add_value('name', ['rabbit', 'hole'])
+        il.add_value('url', ['rabbit', 'hole'])
+        il.add_value('summary', ['rabbit', 'hole'])
+        item = il.load_item()
+        self.assertEqual(item['name'], 'rabbit+hole')
+        self.assertEqual(item['url'], 'rabbit.hole')
+        self.assertEqual(item['summary'], 'rabbithole')
+
+    def test_error_input_processor(self):
+        class TestItem(Item):
+            name = Field()
+
+        class TestItemLoader(ItemLoader):
+            default_item_class = TestItem
+            name_in = MapCompose(float)
+
+        il = TestItemLoader()
+        self.assertRaises(ValueError, il.add_value, 'name',
+                          ['marta', 'other'])
+
+    def test_error_output_processor(self):
+        class TestItem(Item):
+            name = Field()
+
+        class TestItemLoader(ItemLoader):
+            default_item_class = TestItem
+            name_out = Compose(Join(), float)
+
+        il = TestItemLoader()
+        il.add_value('name', 'marta')
+        with self.assertRaises(ValueError):
+            il.load_item()
+
+    def test_error_processor_as_argument(self):
+        class TestItem(Item):
+            name = Field()
+
+        class TestItemLoader(ItemLoader):
+            default_item_class = TestItem
+
+        il = TestItemLoader()
+        self.assertRaises(ValueError, il.add_value, 'name',
+                          ['marta', 'other'], Compose(float))
+
+
+class InitializationFromDictTest(unittest.TestCase):
+
+    item_class = dict
+
+    def test_keep_single_value(self):
+        """Loaded item should contain values from the initial item"""
+        input_item = self.item_class(name='foo')
+        il = ItemLoader(item=input_item)
+        loaded_item = il.load_item()
+        self.assertIsInstance(loaded_item, self.item_class)
+        self.assertEqual(dict(loaded_item), {'name': ['foo']})
+
+    def test_keep_list(self):
+        """Loaded item should contain values from the initial item"""
+        input_item = self.item_class(name=['foo', 'bar'])
+        il = ItemLoader(item=input_item)
+        loaded_item = il.load_item()
+        self.assertIsInstance(loaded_item, self.item_class)
+        self.assertEqual(dict(loaded_item), {'name': ['foo', 'bar']})
+
+    def test_add_value_singlevalue_singlevalue(self):
+        """Values added after initialization should be appended"""
+        input_item = self.item_class(name='foo')
+        il = ItemLoader(item=input_item)
+        il.add_value('name', 'bar')
+        loaded_item = il.load_item()
+        self.assertIsInstance(loaded_item, self.item_class)
+        self.assertEqual(dict(loaded_item), {'name': ['foo', 'bar']})
+
+    def test_add_value_singlevalue_list(self):
+        """Values added after initialization should be appended"""
+        input_item = self.item_class(name='foo')
+        il = ItemLoader(item=input_item)
+        il.add_value('name', ['item', 'loader'])
+        loaded_item = il.load_item()
+        self.assertIsInstance(loaded_item, self.item_class)
+        self.assertEqual(dict(loaded_item), {'name': ['foo', 'item', 'loader']})
+
+    def test_add_value_list_singlevalue(self):
+        """Values added after initialization should be appended"""
+        input_item = self.item_class(name=['foo', 'bar'])
+        il = ItemLoader(item=input_item)
+        il.add_value('name', 'qwerty')
+        loaded_item = il.load_item()
+        self.assertIsInstance(loaded_item, self.item_class)
+        self.assertEqual(dict(loaded_item), {'name': ['foo', 'bar', 'qwerty']})
+
+    def test_add_value_list_list(self):
+        """Values added after initialization should be appended"""
+        input_item = self.item_class(name=['foo', 'bar'])
+        il = ItemLoader(item=input_item)
+        il.add_value('name', ['item', 'loader'])
+        loaded_item = il.load_item()
+        self.assertIsInstance(loaded_item, self.item_class)
+        self.assertEqual(dict(loaded_item), {'name': ['foo', 'bar', 'item', 'loader']})
+
+    def test_get_output_value_singlevalue(self):
+        """Getting output value must not remove value from item"""
+        input_item = self.item_class(name='foo')
+        il = ItemLoader(item=input_item)
+        self.assertEqual(il.get_output_value('name'), ['foo'])
+        loaded_item = il.load_item()
+        self.assertIsInstance(loaded_item, self.item_class)
+        self.assertEqual(loaded_item, dict({'name': ['foo']}))
+
+    def test_get_output_value_list(self):
+        """Getting output value must not remove value from item"""
+        input_item = self.item_class(name=['foo', 'bar'])
+        il = ItemLoader(item=input_item)
+        self.assertEqual(il.get_output_value('name'), ['foo', 'bar'])
+        loaded_item = il.load_item()
+        self.assertIsInstance(loaded_item, self.item_class)
+        self.assertEqual(loaded_item, dict({'name': ['foo', 'bar']}))
+
+    def test_values_single(self):
+        """Values from initial item must be added to loader._values"""
+        input_item = self.item_class(name='foo')
+        il = ItemLoader(item=input_item)
+        self.assertEqual(il._values.get('name'), ['foo'])
+
+    def test_values_list(self):
+        """Values from initial item must be added to loader._values"""
+        input_item = self.item_class(name=['foo', 'bar'])
+        il = ItemLoader(item=input_item)
+        self.assertEqual(il._values.get('name'), ['foo', 'bar'])
+
+
+class BaseNoInputReprocessingLoader(ItemLoader):
+    title_in = MapCompose(str.upper)
+    title_out = TakeFirst()
+
+
+class NoInputReprocessingDictLoader(BaseNoInputReprocessingLoader):
+    default_item_class = dict
+
+
+class NoInputReprocessingFromDictTest(unittest.TestCase):
+    """
+    Loaders initialized from loaded items must not reprocess fields (dict instances)
+    """
+    def test_avoid_reprocessing_with_initial_values_single(self):
+        il = NoInputReprocessingDictLoader(item=dict(title='foo'))
+        il_loaded = il.load_item()
+        self.assertEqual(il_loaded, dict(title='foo'))
+        self.assertEqual(NoInputReprocessingDictLoader(item=il_loaded).load_item(), dict(title='foo'))
+
+    def test_avoid_reprocessing_with_initial_values_list(self):
+        il = NoInputReprocessingDictLoader(item=dict(title=['foo', 'bar']))
+        il_loaded = il.load_item()
+        self.assertEqual(il_loaded, dict(title='foo'))
+        self.assertEqual(NoInputReprocessingDictLoader(item=il_loaded).load_item(), dict(title='foo'))
+
+    def test_avoid_reprocessing_without_initial_values_single(self):
+        il = NoInputReprocessingDictLoader()
+        il.add_value('title', 'foo')
+        il_loaded = il.load_item()
+        self.assertEqual(il_loaded, dict(title='FOO'))
+        self.assertEqual(NoInputReprocessingDictLoader(item=il_loaded).load_item(), dict(title='FOO'))
+
+    def test_avoid_reprocessing_without_initial_values_list(self):
+        il = NoInputReprocessingDictLoader()
+        il.add_value('title', ['foo', 'bar'])
+        il_loaded = il.load_item()
+        self.assertEqual(il_loaded, dict(title='FOO'))
+        self.assertEqual(NoInputReprocessingDictLoader(item=il_loaded).load_item(), dict(title='FOO'))
+
+
+class TestOutputProcessorDict(unittest.TestCase):
+    def test_output_processor(self):
+
+        class TempDict(dict):
+            def __init__(self, *args, **kwargs):
+                super().__init__(self, *args, **kwargs)
+                self.setdefault('temp', 0.3)
+
+        class TempLoader(ItemLoader):
+            default_item_class = TempDict
+            default_input_processor = Identity()
+            default_output_processor = Compose(TakeFirst())
+
+        loader = TempLoader()
+        item = loader.load_item()
+        self.assertIsInstance(item, TempDict)
+        self.assertEqual(dict(item), {'temp': 0.3})
+
+
+class ProcessorsTest(unittest.TestCase):
+
+    def test_take_first(self):
+        proc = TakeFirst()
+        self.assertEqual(proc([None, '', 'hello', 'world']), 'hello')
+        self.assertEqual(proc([None, '', 0, 'hello', 'world']), 0)
+
+    def test_identity(self):
+        proc = Identity()
+        self.assertEqual(proc([None, '', 'hello', 'world']),
+                         [None, '', 'hello', 'world'])
+
+    def test_join(self):
+        proc = Join()
+        self.assertRaises(TypeError, proc, [None, '', 'hello', 'world'])
+        self.assertEqual(proc(['', 'hello', 'world']), ' hello world')
+        self.assertEqual(proc(['hello', 'world']), 'hello world')
+        self.assertIsInstance(proc(['hello', 'world']), str)
+
+    def test_compose(self):
+        proc = Compose(lambda v: v[0], str.upper)
+        self.assertEqual(proc(['hello', 'world']), 'HELLO')
+        proc = Compose(str.upper)
+        self.assertEqual(proc(None), None)
+        proc = Compose(str.upper, stop_on_none=False)
+        self.assertRaises(ValueError, proc, None)
+        proc = Compose(str.upper, lambda x: x + 1)
+        self.assertRaises(ValueError, proc, 'hello')
+
+    def test_mapcompose(self):
+        def filter_world(x):
+            return None if x == 'world' else x
+        proc = MapCompose(filter_world, str.upper)
+        self.assertEqual(proc(['hello', 'world', 'this', 'is', 'scrapy']),
+                         ['HELLO', 'THIS', 'IS', 'SCRAPY'])
+        proc = MapCompose(filter_world, str.upper)
+        self.assertEqual(proc(None), [])
+        proc = MapCompose(filter_world, str.upper)
+        self.assertRaises(ValueError, proc, [1])
+        proc = MapCompose(filter_world, lambda x: x + 1)
+        self.assertRaises(ValueError, proc, 'hello')
+
+
+class SelectJmesTestCase(unittest.TestCase):
+    test_list_equals = {
+        'simple': ('foo.bar', {"foo": {"bar": "baz"}}, "baz"),
+        'invalid': ('foo.bar.baz', {"foo": {"bar": "baz"}}, None),
+        'top_level': ('foo', {"foo": {"bar": "baz"}}, {"bar": "baz"}),
+        'double_vs_single_quote_string': ('foo.bar', {"foo": {"bar": "baz"}}, "baz"),
+        'dict': (
+            'foo.bar[*].name',
+            {"foo": {"bar": [{"name": "one"}, {"name": "two"}]}},
+            ['one', 'two']
+        ),
+        'list': ('[1]', [1, 2], 2)
+    }
+
+    def test_output(self):
+        for tl in self.test_list_equals:
+            expr, test_list, expected = self.test_list_equals[tl]
+            test = SelectJmes(expr)(test_list)
+            self.assertEqual(
+                test,
+                expected,
+                msg='test "{}" got {} expected {}'.format(tl, test, expected)
+            )
+
+
+# Functions as processors
+
+def function_processor_strip(iterable):
+    return [x.strip() for x in iterable]
+
+
+def function_processor_upper(iterable):
+    return [x.upper() for x in iterable]
+
+
+class FunctionProcessorItem(Item):
+    foo = Field(
+        input_processor=function_processor_strip,
+        output_processor=function_processor_upper,
+    )
+
+
+class FunctionProcessorDictLoader(ItemLoader):
+    default_item_class = dict
+    foo_in = function_processor_strip
+    foo_out = function_processor_upper
+
+
+class FunctionProcessorTestCase(unittest.TestCase):
+
+    def test_processor_defined_in_item_loader(self):
+        lo = FunctionProcessorDictLoader()
+        lo.add_value('foo', '  bar  ')
+        lo.add_value('foo', ['  asdf  ', '  qwerty  '])
+        self.assertEqual(
+            dict(lo.load_item()),
+            {'foo': ['BAR', 'ASDF', 'QWERTY']}
+        )
+
+
+class DeprecatedUtilityFunctionsTestCase(unittest.TestCase):
+
+    def test_deprecated_wrap_loader_context(self):
+        def function(*args):
+            return None
+
+        with warnings.catch_warnings(record=True) as w:
+            wrap_loader_context(function, context=dict())
+
+            assert len(w) == 1
+            assert issubclass(w[0].category, ScrapyDeprecationWarning)
+
+    def test_deprecated_extract_regex(self):
+        with warnings.catch_warnings(record=True) as w:
+            extract_regex(r'\w+', 'this is a test')
+
+            assert len(w) == 1
+            assert issubclass(w[0].category, ScrapyDeprecationWarning)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_log.py b/tests/test_log.py
deleted file mode 100644
index 3263cb42e..000000000
--- a/tests/test_log.py
+++ /dev/null
@@ -1,137 +0,0 @@
-from io import BytesIO
-
-from twisted.python import log as txlog, failure
-from twisted.trial import unittest
-
-from scrapy import log
-from scrapy.spider import Spider
-from scrapy.settings import default_settings
-
-class LogTest(unittest.TestCase):
-
-    def test_get_log_level(self):
-        default_log_level = getattr(log, default_settings.LOG_LEVEL)
-        self.assertEqual(log._get_log_level('WARNING'), log.WARNING)
-        self.assertEqual(log._get_log_level(log.WARNING), log.WARNING)
-        self.assertRaises(ValueError, log._get_log_level, object())
-
-class ScrapyFileLogObserverTest(unittest.TestCase):
-
-    level = log.INFO
-    encoding = 'utf-8'
-
-    def setUp(self):
-        self.f = BytesIO()
-        self.log_observer = log.ScrapyFileLogObserver(self.f, self.level, self.encoding)
-        self.log_observer.start()
-
-    def tearDown(self):
-        self.flushLoggedErrors()
-        self.log_observer.stop()
-
-    def logged(self):
-        return self.f.getvalue().strip()[25:]
-
-    def first_log_line(self):
-        logged = self.logged()
-        return logged.splitlines()[0] if logged else ''
-
-    def test_msg_basic(self):
-        log.msg("Hello")
-        self.assertEqual(self.logged(), "[scrapy] INFO: Hello")
-
-    def test_msg_spider(self):
-        spider = Spider("myspider")
-        log.msg("Hello", spider=spider)
-        self.assertEqual(self.logged(), "[myspider] INFO: Hello")
-
-    def test_msg_level1(self):
-        log.msg("Hello", level=log.WARNING)
-        self.assertEqual(self.logged(), "[scrapy] WARNING: Hello")
-
-    def test_msg_level2(self):
-        log.msg("Hello", log.WARNING)
-        self.assertEqual(self.logged(), "[scrapy] WARNING: Hello")
-
-    def test_msg_wrong_level(self):
-        log.msg("Hello", level=9999)
-        self.assertEqual(self.logged(), "[scrapy] NOLEVEL: Hello")
-
-    def test_msg_level_spider(self):
-        spider = Spider("myspider")
-        log.msg("Hello", spider=spider, level=log.WARNING)
-        self.assertEqual(self.logged(), "[myspider] WARNING: Hello")
-
-    def test_msg_encoding(self):
-        log.msg(u"Price: \xa3100")
-        self.assertEqual(self.logged(), "[scrapy] INFO: Price: \xc2\xa3100")
-
-    def test_msg_ignore_level(self):
-        log.msg("Hello", level=log.DEBUG)
-        log.msg("World", level=log.INFO)
-        self.assertEqual(self.logged(), "[scrapy] INFO: World")
-
-    def test_msg_ignore_system(self):
-        txlog.msg("Hello")
-        self.failIf(self.logged())
-
-    def test_msg_ignore_system_err(self):
-        txlog.msg("Hello")
-        self.failIf(self.logged())
-
-    def test_err_noargs(self):
-        try:
-            a = 1/0
-        except:
-            log.err()
-        self.assertIn('Traceback', self.logged())
-        self.assertIn('ZeroDivisionError', self.logged())
-
-    def test_err_why(self):
-        log.err(TypeError("bad type"), "Wrong type")
-        self.assertEqual(self.first_log_line(), "[scrapy] ERROR: Wrong type")
-        self.assertIn('TypeError', self.logged())
-        self.assertIn('bad type', self.logged())
-
-    def test_error_outside_scrapy(self):
-        """Scrapy logger should still print outside errors"""
-        txlog.err(TypeError("bad type"), "Wrong type")
-        self.assertEqual(self.first_log_line(), "[-] ERROR: Wrong type")
-        self.assertIn('TypeError', self.logged())
-        self.assertIn('bad type', self.logged())
-
-# this test fails in twisted trial observer, not in scrapy observer
-#    def test_err_why_encoding(self):
-#        log.err(TypeError("bad type"), u"\xa3")
-#        self.assertEqual(self.first_log_line(), "[scrapy] ERROR: \xc2\xa3")
-
-    def test_err_exc(self):
-        log.err(TypeError("bad type"))
-        self.assertIn('Unhandled Error', self.logged())
-        self.assertIn('TypeError', self.logged())
-        self.assertIn('bad type', self.logged())
-
-    def test_err_failure(self):
-        log.err(failure.Failure(TypeError("bad type")))
-        self.assertIn('Unhandled Error', self.logged())
-        self.assertIn('TypeError', self.logged())
-        self.assertIn('bad type', self.logged())
-
-
-class Latin1ScrapyFileLogObserverTest(ScrapyFileLogObserverTest):
-
-    encoding = 'latin-1'
-
-    def test_msg_encoding(self):
-        log.msg(u"Price: \xa3100")
-        logged = self.f.getvalue().strip()[25:]
-        self.assertEqual(self.logged(), "[scrapy] INFO: Price: \xa3100")
-
-# this test fails in twisted trial observer, not in scrapy observer
-#    def test_err_why_encoding(self):
-#        log.err(TypeError("bad type"), u"\xa3")
-#        self.assertEqual(self.first_log_line(), "[scrapy] ERROR: \xa3")
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/test_logformatter.py b/tests/test_logformatter.py
index adec19afa..41ff3651d 100644
--- a/tests/test_logformatter.py
+++ b/tests/test_logformatter.py
@@ -1,9 +1,18 @@
 import unittest
 
-from scrapy.spider import Spider
+from testfixtures import LogCapture
+from twisted.internet import defer
+from twisted.python.failure import Failure
+from twisted.trial.unittest import TestCase as TwistedTestCase
+
+from scrapy.crawler import CrawlerRunner
+from scrapy.exceptions import DropItem
 from scrapy.http import Request, Response
 from scrapy.item import Item, Field
 from scrapy.logformatter import LogFormatter
+from scrapy.spiders import Spider
+from tests.mockserver import MockServer
+from tests.spiders import ItemSpider
 
 
 class CustomItem(Item):
@@ -14,46 +23,203 @@ class CustomItem(Item):
         return "name: %s" % self['name']
 
 
-class LoggingContribTest(unittest.TestCase):
+class LogFormatterTestCase(unittest.TestCase):
 
     def setUp(self):
         self.formatter = LogFormatter()
         self.spider = Spider('default')
 
-    def test_crawled(self):
+    def test_crawled_with_referer(self):
         req = Request("http://www.example.com")
         res = Response("http://www.example.com")
         logkws = self.formatter.crawled(req, res, self.spider)
-        logline = logkws['format'] % logkws
-        self.assertEqual(logline,
-            "Crawled (200) <GET http://www.example.com> (referer: None)")
+        logline = logkws['msg'] % logkws['args']
+        self.assertEqual(logline, "Crawled (200) <GET http://www.example.com> (referer: None)")
 
+    def test_crawled_without_referer(self):
         req = Request("http://www.example.com", headers={'referer': 'http://example.com'})
         res = Response("http://www.example.com", flags=['cached'])
         logkws = self.formatter.crawled(req, res, self.spider)
-        logline = logkws['format'] % logkws
-        self.assertEqual(logline,
+        logline = logkws['msg'] % logkws['args']
+        self.assertEqual(
+            logline,
             "Crawled (200) <GET http://www.example.com> (referer: http://example.com) ['cached']")
 
+    def test_flags_in_request(self):
+        req = Request("http://www.example.com", flags=['test', 'flag'])
+        res = Response("http://www.example.com")
+        logkws = self.formatter.crawled(req, res, self.spider)
+        logline = logkws['msg'] % logkws['args']
+        self.assertEqual(
+            logline,
+            "Crawled (200) <GET http://www.example.com> ['test', 'flag'] (referer: None)")
+
     def test_dropped(self):
         item = {}
-        exception = Exception(u"\u2018")
+        exception = Exception("\u2018")
         response = Response("http://www.example.com")
         logkws = self.formatter.dropped(item, exception, response, self.spider)
-        logline = logkws['format'] % logkws
+        logline = logkws['msg'] % logkws['args']
         lines = logline.splitlines()
-        assert all(isinstance(x, unicode) for x in lines)
-        self.assertEqual(lines, [u"Dropped: \u2018", '{}'])
+        assert all(isinstance(x, str) for x in lines)
+        self.assertEqual(lines, ["Dropped: \u2018", '{}'])
+
+    def test_item_error(self):
+        # In practice, the complete traceback is shown by passing the
+        # 'exc_info' argument to the logging function
+        item = {'key': 'value'}
+        exception = Exception()
+        response = Response("http://www.example.com")
+        logkws = self.formatter.item_error(item, exception, response, self.spider)
+        logline = logkws['msg'] % logkws['args']
+        self.assertEqual(logline, "Error processing {'key': 'value'}")
+
+    def test_spider_error(self):
+        # In practice, the complete traceback is shown by passing the
+        # 'exc_info' argument to the logging function
+        failure = Failure(Exception())
+        request = Request("http://www.example.com", headers={'Referer': 'http://example.org'})
+        response = Response("http://www.example.com", request=request)
+        logkws = self.formatter.spider_error(failure, request, response, self.spider)
+        logline = logkws['msg'] % logkws['args']
+        self.assertEqual(
+            logline,
+            "Spider error processing <GET http://www.example.com> (referer: http://example.org)"
+        )
+
+    def test_download_error_short(self):
+        # In practice, the complete traceback is shown by passing the
+        # 'exc_info' argument to the logging function
+        failure = Failure(Exception())
+        request = Request("http://www.example.com")
+        logkws = self.formatter.download_error(failure, request, self.spider)
+        logline = logkws['msg'] % logkws['args']
+        self.assertEqual(logline, "Error downloading <GET http://www.example.com>")
+
+    def test_download_error_long(self):
+        # In practice, the complete traceback is shown by passing the
+        # 'exc_info' argument to the logging function
+        failure = Failure(Exception())
+        request = Request("http://www.example.com")
+        logkws = self.formatter.download_error(failure, request, self.spider, "Some message")
+        logline = logkws['msg'] % logkws['args']
+        self.assertEqual(logline, "Error downloading <GET http://www.example.com>: Some message")
 
     def test_scraped(self):
         item = CustomItem()
-        item['name'] = u'\xa3'
+        item['name'] = '\xa3'
         response = Response("http://www.example.com")
         logkws = self.formatter.scraped(item, response, self.spider)
-        logline = logkws['format'] % logkws
+        logline = logkws['msg'] % logkws['args']
         lines = logline.splitlines()
-        assert all(isinstance(x, unicode) for x in lines)
-        self.assertEqual(lines, [u"Scraped from <200 http://www.example.com>", u'name: \xa3'])
+        assert all(isinstance(x, str) for x in lines)
+        self.assertEqual(lines, ["Scraped from <200 http://www.example.com>", 'name: \xa3'])
+
+
+class LogFormatterSubclass(LogFormatter):
+    def crawled(self, request, response, spider):
+        kwargs = super().crawled(request, response, spider)
+        CRAWLEDMSG = (
+            "Crawled (%(status)s) %(request)s (referer: %(referer)s) %(flags)s"
+        )
+        log_args = kwargs['args']
+        log_args['flags'] = str(request.flags)
+        return {
+            'level': kwargs['level'],
+            'msg': CRAWLEDMSG,
+            'args': log_args,
+        }
+
+
+class LogformatterSubclassTest(LogFormatterTestCase):
+    def setUp(self):
+        self.formatter = LogFormatterSubclass()
+        self.spider = Spider('default')
+
+    def test_crawled_with_referer(self):
+        req = Request("http://www.example.com")
+        res = Response("http://www.example.com")
+        logkws = self.formatter.crawled(req, res, self.spider)
+        logline = logkws['msg'] % logkws['args']
+        self.assertEqual(
+            logline,
+            "Crawled (200) <GET http://www.example.com> (referer: None) []")
+
+    def test_crawled_without_referer(self):
+        req = Request("http://www.example.com", headers={'referer': 'http://example.com'}, flags=['cached'])
+        res = Response("http://www.example.com")
+        logkws = self.formatter.crawled(req, res, self.spider)
+        logline = logkws['msg'] % logkws['args']
+        self.assertEqual(
+            logline,
+            "Crawled (200) <GET http://www.example.com> (referer: http://example.com) ['cached']")
+
+    def test_flags_in_request(self):
+        req = Request("http://www.example.com", flags=['test', 'flag'])
+        res = Response("http://www.example.com")
+        logkws = self.formatter.crawled(req, res, self.spider)
+        logline = logkws['msg'] % logkws['args']
+        self.assertEqual(
+            logline,
+            "Crawled (200) <GET http://www.example.com> (referer: None) ['test', 'flag']")
+
+
+class SkipMessagesLogFormatter(LogFormatter):
+    def crawled(self, *args, **kwargs):
+        return None
+
+    def scraped(self, *args, **kwargs):
+        return None
+
+    def dropped(self, *args, **kwargs):
+        return None
+
+
+class DropSomeItemsPipeline:
+    drop = True
+
+    def process_item(self, item, spider):
+        if self.drop:
+            self.drop = False
+            raise DropItem("Ignoring item")
+        else:
+            self.drop = True
+
+
+class ShowOrSkipMessagesTestCase(TwistedTestCase):
+    def setUp(self):
+        self.mockserver = MockServer()
+        self.mockserver.__enter__()
+        self.base_settings = {
+            'LOG_LEVEL': 'DEBUG',
+            'ITEM_PIPELINES': {
+                __name__ + '.DropSomeItemsPipeline': 300,
+            },
+        }
+
+    def tearDown(self):
+        self.mockserver.__exit__(None, None, None)
+
+    @defer.inlineCallbacks
+    def test_show_messages(self):
+        crawler = CrawlerRunner(self.base_settings).create_crawler(ItemSpider)
+        with LogCapture() as lc:
+            yield crawler.crawl(mockserver=self.mockserver)
+        self.assertIn("Scraped from <200 http://127.0.0.1:", str(lc))
+        self.assertIn("Crawled (200) <GET http://127.0.0.1:", str(lc))
+        self.assertIn("Dropped: Ignoring item", str(lc))
+
+    @defer.inlineCallbacks
+    def test_skip_messages(self):
+        settings = self.base_settings.copy()
+        settings['LOG_FORMATTER'] = __name__ + '.SkipMessagesLogFormatter'
+        crawler = CrawlerRunner(settings).create_crawler(ItemSpider)
+        with LogCapture() as lc:
+            yield crawler.crawl(mockserver=self.mockserver)
+        self.assertNotIn("Scraped from <200 http://127.0.0.1:", str(lc))
+        self.assertNotIn("Crawled (200) <GET http://127.0.0.1:", str(lc))
+        self.assertNotIn("Dropped: Ignoring item", str(lc))
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_mail.py b/tests/test_mail.py
index 58d44bdb3..9b248fbfa 100644
--- a/tests/test_mail.py
+++ b/tests/test_mail.py
@@ -1,13 +1,18 @@
+# coding=utf-8
+
 import unittest
 from io import BytesIO
+from email.charset import Charset
 
 from scrapy.mail import MailSender
 
+
 class MailSenderTest(unittest.TestCase):
 
     def test_send(self):
         mailsender = MailSender(debug=True)
-        mailsender.send(to=['test@scrapy.org'], subject='subject', body='body', _callback=self._catch_mail_sent)
+        mailsender.send(to=['test@scrapy.org'], subject='subject', body='body',
+                        _callback=self._catch_mail_sent)
 
         assert self.catched_msg
 
@@ -21,9 +26,16 @@ class MailSenderTest(unittest.TestCase):
         self.assertEqual(msg.get_payload(), 'body')
         self.assertEqual(msg.get('Content-Type'), 'text/plain')
 
+    def test_send_single_values_to_and_cc(self):
+        mailsender = MailSender(debug=True)
+        mailsender.send(to='test@scrapy.org', subject='subject', body='body',
+                        cc='test@scrapy.org', _callback=self._catch_mail_sent)
+
     def test_send_html(self):
         mailsender = MailSender(debug=True)
-        mailsender.send(to=['test@scrapy.org'], subject='subject', body='<p>body</p>', mimetype='text/html', _callback=self._catch_mail_sent)
+        mailsender.send(to=['test@scrapy.org'], subject='subject',
+                        body='<p>body</p>', mimetype='text/html',
+                        _callback=self._catch_mail_sent)
 
         msg = self.catched_msg['msg']
         self.assertEqual(msg.get_payload(), '<p>body</p>')
@@ -37,7 +49,7 @@ class MailSenderTest(unittest.TestCase):
 
         mailsender = MailSender(debug=True)
         mailsender.send(to=['test@scrapy.org'], subject='subject', body='body',
-                       attachs=attachs, _callback=self._catch_mail_sent)
+                        attachs=attachs, _callback=self._catch_mail_sent)
 
         assert self.catched_msg
         self.assertEqual(self.catched_msg['to'], ['test@scrapy.org'])
@@ -53,12 +65,62 @@ class MailSenderTest(unittest.TestCase):
         self.assertEqual(len(payload), 2)
 
         text, attach = payload
-        self.assertEqual(text.get_payload(decode=True), 'body')
-        self.assertEqual(attach.get_payload(decode=True), 'content')
+        self.assertEqual(text.get_payload(decode=True), b'body')
+        self.assertEqual(text.get_charset(), Charset('us-ascii'))
+        self.assertEqual(attach.get_payload(decode=True), b'content')
 
     def _catch_mail_sent(self, **kwargs):
         self.catched_msg = dict(**kwargs)
 
+    def test_send_utf8(self):
+        subject = 'sübjèçt'
+        body = 'bödÿ-àéïöñß'
+        mailsender = MailSender(debug=True)
+        mailsender.send(to=['test@scrapy.org'], subject=subject, body=body,
+                        charset='utf-8', _callback=self._catch_mail_sent)
+
+        assert self.catched_msg
+        self.assertEqual(self.catched_msg['subject'], subject)
+        self.assertEqual(self.catched_msg['body'], body)
+
+        msg = self.catched_msg['msg']
+        self.assertEqual(msg['subject'], subject)
+        self.assertEqual(msg.get_payload(), body)
+        self.assertEqual(msg.get_charset(), Charset('utf-8'))
+        self.assertEqual(msg.get('Content-Type'), 'text/plain; charset="utf-8"')
+
+    def test_send_attach_utf8(self):
+        subject = 'sübjèçt'
+        body = 'bödÿ-àéïöñß'
+        attach = BytesIO()
+        attach.write(body.encode('utf-8'))
+        attach.seek(0)
+        attachs = [('attachment', 'text/plain', attach)]
+
+        mailsender = MailSender(debug=True)
+        mailsender.send(to=['test@scrapy.org'], subject=subject, body=body,
+                        attachs=attachs, charset='utf-8',
+                        _callback=self._catch_mail_sent)
+
+        assert self.catched_msg
+        self.assertEqual(self.catched_msg['subject'], subject)
+        self.assertEqual(self.catched_msg['body'], body)
+
+        msg = self.catched_msg['msg']
+        self.assertEqual(msg['subject'], subject)
+        self.assertEqual(msg.get_charset(), Charset('utf-8'))
+        self.assertEqual(msg.get('Content-Type'),
+                         'multipart/mixed; charset="utf-8"')
+
+        payload = msg.get_payload()
+        assert isinstance(payload, list)
+        self.assertEqual(len(payload), 2)
+
+        text, attach = payload
+        self.assertEqual(text.get_payload(decode=True).decode('utf-8'), body)
+        self.assertEqual(text.get_charset(), Charset('utf-8'))
+        self.assertEqual(attach.get_payload(decode=True).decode('utf-8'), body)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_middleware.py b/tests/test_middleware.py
index 48131462c..b2b75ef20 100644
--- a/tests/test_middleware.py
+++ b/tests/test_middleware.py
@@ -4,7 +4,8 @@ from scrapy.settings import Settings
 from scrapy.exceptions import NotConfigured
 from scrapy.middleware import MiddlewareManager
 
-class M1(object):
+
+class M1:
 
     def open_spider(self, spider):
         pass
@@ -15,7 +16,8 @@ class M1(object):
     def process(self, response, request, spider):
         pass
 
-class M2(object):
+
+class M2:
 
     def open_spider(self, spider):
         pass
@@ -25,13 +27,14 @@ class M2(object):
 
     pass
 
-class M3(object):
+
+class M3:
 
     def process(self, response, request, spider):
         pass
 
 
-class MOff(object):
+class MOff:
 
     def open_spider(self, spider):
         pass
@@ -50,26 +53,30 @@ class TestMiddlewareManager(MiddlewareManager):
         return ['tests.test_middleware.%s' % x for x in ['M1', 'MOff', 'M3']]
 
     def _add_middleware(self, mw):
-        super(TestMiddlewareManager, self)._add_middleware(mw)
+        super()._add_middleware(mw)
         if hasattr(mw, 'process'):
             self.methods['process'].append(mw.process)
 
+
 class MiddlewareManagerTest(unittest.TestCase):
 
     def test_init(self):
         m1, m2, m3 = M1(), M2(), M3()
         mwman = TestMiddlewareManager(m1, m2, m3)
-        self.assertEqual(mwman.methods['open_spider'], [m1.open_spider, m2.open_spider])
-        self.assertEqual(mwman.methods['close_spider'], [m2.close_spider, m1.close_spider])
-        self.assertEqual(mwman.methods['process'], [m1.process, m3.process])
+        self.assertEqual(list(mwman.methods['open_spider']), [m1.open_spider, m2.open_spider])
+        self.assertEqual(list(mwman.methods['close_spider']), [m2.close_spider, m1.close_spider])
+        self.assertEqual(list(mwman.methods['process']), [m1.process, m3.process])
 
     def test_methods(self):
         mwman = TestMiddlewareManager(M1(), M2(), M3())
-        self.assertEqual([x.im_class for x in mwman.methods['open_spider']],
+        self.assertEqual(
+            [x.__self__.__class__ for x in mwman.methods['open_spider']],
             [M1, M2])
-        self.assertEqual([x.im_class for x in mwman.methods['close_spider']],
+        self.assertEqual(
+            [x.__self__.__class__ for x in mwman.methods['close_spider']],
             [M2, M1])
-        self.assertEqual([x.im_class for x in mwman.methods['process']],
+        self.assertEqual(
+            [x.__self__.__class__ for x in mwman.methods['process']],
             [M1, M3])
 
     def test_enabled(self):
diff --git a/tests/test_pipeline_crawl.py b/tests/test_pipeline_crawl.py
new file mode 100644
index 000000000..9af5affec
--- /dev/null
+++ b/tests/test_pipeline_crawl.py
@@ -0,0 +1,190 @@
+import os
+import shutil
+
+from testfixtures import LogCapture
+from twisted.internet import defer
+from twisted.trial.unittest import TestCase
+from w3lib.url import add_or_replace_parameter
+
+from scrapy.crawler import CrawlerRunner
+from scrapy import signals
+from tests.mockserver import MockServer
+from tests.spiders import SimpleSpider
+
+
+class MediaDownloadSpider(SimpleSpider):
+    name = 'mediadownload'
+
+    def _process_url(self, url):
+        return url
+
+    def parse(self, response):
+        self.logger.info(response.headers)
+        self.logger.info(response.text)
+        item = {
+            self.media_key: [],
+            self.media_urls_key: [
+                self._process_url(response.urljoin(href))
+                for href in response.xpath(
+                    '//table[thead/tr/th="Filename"]/tbody//a/@href'
+                ).getall()],
+        }
+        yield item
+
+
+class BrokenLinksMediaDownloadSpider(MediaDownloadSpider):
+    name = 'brokenmedia'
+
+    def _process_url(self, url):
+        return url + '.foo'
+
+
+class RedirectedMediaDownloadSpider(MediaDownloadSpider):
+    name = 'redirectedmedia'
+
+    def _process_url(self, url):
+        return add_or_replace_parameter(self.mockserver.url('/redirect-to'), 'goto', url)
+
+
+class FileDownloadCrawlTestCase(TestCase):
+    pipeline_class = 'scrapy.pipelines.files.FilesPipeline'
+    store_setting_key = 'FILES_STORE'
+    media_key = 'files'
+    media_urls_key = 'file_urls'
+    expected_checksums = {
+        '5547178b89448faf0015a13f904c936e',
+        'c2281c83670e31d8aaab7cb642b824db',
+        'ed3f6538dc15d4d9179dae57319edc5f'}
+
+    def setUp(self):
+        self.mockserver = MockServer()
+        self.mockserver.__enter__()
+
+        # prepare a directory for storing files
+        self.tmpmediastore = self.mktemp()
+        os.mkdir(self.tmpmediastore)
+        self.settings = {
+            'ITEM_PIPELINES': {self.pipeline_class: 1},
+            self.store_setting_key: self.tmpmediastore,
+        }
+        self.runner = CrawlerRunner(self.settings)
+        self.items = []
+
+    def tearDown(self):
+        shutil.rmtree(self.tmpmediastore)
+        self.items = []
+        self.mockserver.__exit__(None, None, None)
+
+    def _on_item_scraped(self, item):
+        self.items.append(item)
+
+    def _create_crawler(self, spider_class, **kwargs):
+        crawler = self.runner.create_crawler(spider_class, **kwargs)
+        crawler.signals.connect(self._on_item_scraped, signals.item_scraped)
+        return crawler
+
+    def _assert_files_downloaded(self, items, logs):
+        self.assertEqual(len(items), 1)
+        self.assertIn(self.media_key, items[0])
+
+        # check that logs show the expected number of successful file downloads
+        file_dl_success = 'File (downloaded): Downloaded file from'
+        self.assertEqual(logs.count(file_dl_success), 3)
+
+        # check that the images/files status is `downloaded`
+        for item in items:
+            for i in item[self.media_key]:
+                self.assertEqual(i['status'], 'downloaded')
+
+        # check that the images/files checksums are what we know they should be
+        if self.expected_checksums is not None:
+            checksums = set(
+                i['checksum']
+                for item in items
+                for i in item[self.media_key]
+            )
+            self.assertEqual(checksums, self.expected_checksums)
+
+        # check that the image files where actually written to the media store
+        for item in items:
+            for i in item[self.media_key]:
+                self.assertTrue(
+                    os.path.exists(
+                        os.path.join(self.tmpmediastore, i['path'])))
+
+    def _assert_files_download_failure(self, crawler, items, code, logs):
+
+        # check that the item does NOT have the "images/files" field populated
+        self.assertEqual(len(items), 1)
+        self.assertIn(self.media_key, items[0])
+        self.assertFalse(items[0][self.media_key])
+
+        # check that there was 1 successful fetch and 3 other responses with non-200 code
+        self.assertEqual(crawler.stats.get_value('downloader/request_method_count/GET'), 4)
+        self.assertEqual(crawler.stats.get_value('downloader/response_count'), 4)
+        self.assertEqual(crawler.stats.get_value('downloader/response_status_count/200'), 1)
+        self.assertEqual(crawler.stats.get_value('downloader/response_status_count/%d' % code), 3)
+
+        # check that logs do show the failure on the file downloads
+        file_dl_failure = 'File (code: %d): Error downloading file from' % code
+        self.assertEqual(logs.count(file_dl_failure), 3)
+
+        # check that no files were written to the media store
+        self.assertEqual(os.listdir(self.tmpmediastore), [])
+
+    @defer.inlineCallbacks
+    def test_download_media(self):
+        crawler = self._create_crawler(MediaDownloadSpider)
+        with LogCapture() as log:
+            yield crawler.crawl(
+                self.mockserver.url("/files/images/"),
+                media_key=self.media_key,
+                media_urls_key=self.media_urls_key)
+        self._assert_files_downloaded(self.items, str(log))
+
+    @defer.inlineCallbacks
+    def test_download_media_wrong_urls(self):
+        crawler = self._create_crawler(BrokenLinksMediaDownloadSpider)
+        with LogCapture() as log:
+            yield crawler.crawl(
+                self.mockserver.url("/files/images/"),
+                media_key=self.media_key,
+                media_urls_key=self.media_urls_key)
+        self._assert_files_download_failure(crawler, self.items, 404, str(log))
+
+    @defer.inlineCallbacks
+    def test_download_media_redirected_default_failure(self):
+        crawler = self._create_crawler(RedirectedMediaDownloadSpider)
+        with LogCapture() as log:
+            yield crawler.crawl(
+                self.mockserver.url("/files/images/"),
+                media_key=self.media_key,
+                media_urls_key=self.media_urls_key,
+                mockserver=self.mockserver)
+        self._assert_files_download_failure(crawler, self.items, 302, str(log))
+
+    @defer.inlineCallbacks
+    def test_download_media_redirected_allowed(self):
+        settings = dict(self.settings)
+        settings.update({'MEDIA_ALLOW_REDIRECTS': True})
+        self.runner = CrawlerRunner(settings)
+
+        crawler = self._create_crawler(RedirectedMediaDownloadSpider)
+        with LogCapture() as log:
+            yield crawler.crawl(
+                self.mockserver.url("/files/images/"),
+                media_key=self.media_key,
+                media_urls_key=self.media_urls_key,
+                mockserver=self.mockserver)
+        self._assert_files_downloaded(self.items, str(log))
+        self.assertEqual(crawler.stats.get_value('downloader/response_status_count/302'), 3)
+
+
+class ImageDownloadCrawlTestCase(FileDownloadCrawlTestCase):
+    pipeline_class = 'scrapy.pipelines.images.ImagesPipeline'
+    store_setting_key = 'IMAGES_STORE'
+    media_key = 'images'
+    media_urls_key = 'image_urls'
+
+    # somehow checksums for images are different for Python 3.3
+    expected_checksums = None
diff --git a/tests/test_pipeline_files.py b/tests/test_pipeline_files.py
index d1bcf5f79..b19b4ff2a 100644
--- a/tests/test_pipeline_files.py
+++ b/tests/test_pipeline_files.py
@@ -1,18 +1,42 @@
-import mock
 import os
+import random
 import time
-import hashlib
-import warnings
-from tempfile import mkdtemp
+from io import BytesIO
 from shutil import rmtree
+from tempfile import mkdtemp
+from unittest import mock, skipIf
+from urllib.parse import urlparse
 
-from twisted.trial import unittest
+import attr
+from itemadapter import ItemAdapter
 from twisted.internet import defer
+from twisted.trial import unittest
 
-from scrapy.contrib.pipeline.files import FilesPipeline, FSFilesStore
-from scrapy.item import Item, Field
 from scrapy.http import Request, Response
+from scrapy.item import Field, Item
+from scrapy.pipelines.files import (
+    FilesPipeline,
+    FSFilesStore,
+    FTPFilesStore,
+    GCSFilesStore,
+    S3FilesStore,
+)
 from scrapy.settings import Settings
+from scrapy.utils.boto import is_botocore
+from scrapy.utils.test import (
+    assert_aws_environ,
+    assert_gcs_environ,
+    get_ftp_content_and_delete,
+    get_gcs_content_and_delete,
+    get_s3_content_and_delete,
+)
+
+
+try:
+    from dataclasses import make_dataclass, field as dataclass_field
+except ImportError:
+    make_dataclass = None
+    dataclass_field = None
 
 
 def _mocked_download_func(request, info):
@@ -33,22 +57,36 @@ class FilesPipelineTestCase(unittest.TestCase):
 
     def test_file_path(self):
         file_path = self.pipeline.file_path
-        self.assertEqual(file_path(Request("https://dev.mydeco.com/mydeco.pdf")),
-                         'full/c9b564df929f4bc635bdd19fde4f3d4847c757c5.pdf')
-        self.assertEqual(file_path(Request("http://www.maddiebrown.co.uk///catalogue-items//image_54642_12175_95307.txt")),
-                         'full/4ce274dd83db0368bafd7e406f382ae088e39219.txt')
-        self.assertEqual(file_path(Request("https://dev.mydeco.com/two/dirs/with%20spaces%2Bsigns.doc")),
-                         'full/94ccc495a17b9ac5d40e3eabf3afcb8c2c9b9e1a.doc')
-        self.assertEqual(file_path(Request("http://www.dfsonline.co.uk/get_prod_image.php?img=status_0907_mdm.jpg")),
-                         'full/4507be485f38b0da8a0be9eb2e1dfab8a19223f2.jpg')
-        self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532/")),
-                         'full/97ee6f8a46cbbb418ea91502fd24176865cf39b2')
-        self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532")),
-                         'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1')
-        self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532"),
-                                   response=Response("http://www.dorma.co.uk/images/product_details/2532"),
-                                   info=object()),
-                         'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1')
+        self.assertEqual(
+            file_path(Request("https://dev.mydeco.com/mydeco.pdf")),
+            'full/c9b564df929f4bc635bdd19fde4f3d4847c757c5.pdf')
+        self.assertEqual(
+            file_path(Request("http://www.maddiebrown.co.uk///catalogue-items//image_54642_12175_95307.txt")),
+            'full/4ce274dd83db0368bafd7e406f382ae088e39219.txt')
+        self.assertEqual(
+            file_path(Request("https://dev.mydeco.com/two/dirs/with%20spaces%2Bsigns.doc")),
+            'full/94ccc495a17b9ac5d40e3eabf3afcb8c2c9b9e1a.doc')
+        self.assertEqual(
+            file_path(Request("http://www.dfsonline.co.uk/get_prod_image.php?img=status_0907_mdm.jpg")),
+            'full/4507be485f38b0da8a0be9eb2e1dfab8a19223f2.jpg')
+        self.assertEqual(
+            file_path(Request("http://www.dorma.co.uk/images/product_details/2532/")),
+            'full/97ee6f8a46cbbb418ea91502fd24176865cf39b2')
+        self.assertEqual(
+            file_path(Request("http://www.dorma.co.uk/images/product_details/2532")),
+            'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1')
+        self.assertEqual(
+            file_path(Request("http://www.dorma.co.uk/images/product_details/2532"),
+                      response=Response("http://www.dorma.co.uk/images/product_details/2532"),
+                      info=object()),
+            'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1')
+        self.assertEqual(
+            file_path(Request("http://www.dfsonline.co.uk/get_prod_image.php?img=status_0907_mdm.jpg.bohaha")),
+            'full/76c00cef2ef669ae65052661f68d451162829507')
+        self.assertEqual(
+            file_path(Request("data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAR0AAACxCAMAAADOHZloAAACClBMVEX/\
+                                    //+F0tzCwMK76ZKQ21AMqr7oAAC96JvD5aWM2kvZ78J0N7fmAAC46Y4Ap7y")),
+            'full/178059cbeba2e34120a67f2dc1afc3ecc09b61cb.png')
 
     def test_fs_store(self):
         assert isinstance(self.pipeline.store, FSFilesStore)
@@ -74,6 +112,7 @@ class FilesPipelineTestCase(unittest.TestCase):
 
         result = yield self.pipeline.process_item(item, None)
         self.assertEqual(result['files'][0]['checksum'], 'abc')
+        self.assertEqual(result['files'][0]['status'], 'uptodate')
 
         for p in patchers:
             p.stop()
@@ -85,7 +124,7 @@ class FilesPipelineTestCase(unittest.TestCase):
         patchers = [
             mock.patch.object(FSFilesStore, 'stat_file', return_value={
                 'checksum': 'abc',
-                'last_modified': time.time() - (FilesPipeline.EXPIRES * 60 * 60 * 24 * 2)}),
+                'last_modified': time.time() - (self.pipeline.expires * 60 * 60 * 24 * 2)}),
             mock.patch.object(FilesPipeline, 'get_media_requests',
                               return_value=[_prepare_request_object(item_url)]),
             mock.patch.object(FilesPipeline, 'inc_stats', return_value=True)
@@ -95,81 +134,372 @@ class FilesPipelineTestCase(unittest.TestCase):
 
         result = yield self.pipeline.process_item(item, None)
         self.assertNotEqual(result['files'][0]['checksum'], 'abc')
+        self.assertEqual(result['files'][0]['status'], 'downloaded')
 
         for p in patchers:
             p.stop()
 
+    @defer.inlineCallbacks
+    def test_file_cached(self):
+        item_url = "http://example.com/file3.pdf"
+        item = _create_item_with_files(item_url)
+        patchers = [
+            mock.patch.object(FilesPipeline, 'inc_stats', return_value=True),
+            mock.patch.object(FSFilesStore, 'stat_file', return_value={
+                'checksum': 'abc',
+                'last_modified': time.time() - (self.pipeline.expires * 60 * 60 * 24 * 2)}),
+            mock.patch.object(FilesPipeline, 'get_media_requests',
+                              return_value=[_prepare_request_object(item_url, flags=['cached'])])
+        ]
+        for p in patchers:
+            p.start()
 
-class DeprecatedFilesPipeline(FilesPipeline):
-    def file_key(self, url):
-        media_guid = hashlib.sha1(url).hexdigest()
-        media_ext = os.path.splitext(url)[1]
-        return 'empty/%s%s' % (media_guid, media_ext)
+        result = yield self.pipeline.process_item(item, None)
+        self.assertNotEqual(result['files'][0]['checksum'], 'abc')
+        self.assertEqual(result['files'][0]['status'], 'cached')
+
+        for p in patchers:
+            p.stop()
+
+    def test_file_path_from_item(self):
+        """
+        Custom file path based on item data, overriding default implementation
+        """
+        class CustomFilesPipeline(FilesPipeline):
+            def file_path(self, request, response=None, info=None, item=None):
+                return 'full/%s' % item.get('path')
+
+        file_path = CustomFilesPipeline.from_settings(Settings({'FILES_STORE': self.tempdir})).file_path
+        item = dict(path='path-to-store-file')
+        request = Request("http://example.com")
+        self.assertEqual(file_path(request, item=item), 'full/path-to-store-file')
 
 
-class DeprecatedFilesPipelineTestCase(unittest.TestCase):
-    def setUp(self):
-        self.tempdir = mkdtemp()
-
-    def init_pipeline(self, pipeline_class):
-        self.pipeline = pipeline_class.from_settings(Settings({'FILES_STORE': self.tempdir}))
-        self.pipeline.download_func = _mocked_download_func
-        self.pipeline.open_spider(None)
-
-    def test_default_file_key_method(self):
-        self.init_pipeline(FilesPipeline)
-        with warnings.catch_warnings(record=True) as w:
-            warnings.simplefilter('always')
-            self.assertEqual(self.pipeline.file_key("https://dev.mydeco.com/mydeco.pdf"),
-                             'full/c9b564df929f4bc635bdd19fde4f3d4847c757c5.pdf')
-            self.assertEqual(len(w), 1)
-            self.assertTrue('file_key(url) method is deprecated' in str(w[-1].message))
-
-    def test_overridden_file_key_method(self):
-        self.init_pipeline(DeprecatedFilesPipeline)
-        with warnings.catch_warnings(record=True) as w:
-            warnings.simplefilter('always')
-            self.assertEqual(self.pipeline.file_path(Request("https://dev.mydeco.com/mydeco.pdf")),
-                             'empty/c9b564df929f4bc635bdd19fde4f3d4847c757c5.pdf')
-            self.assertEqual(len(w), 1)
-            self.assertTrue('file_key(url) method is deprecated' in str(w[-1].message))
-
-    def tearDown(self):
-        rmtree(self.tempdir)
-
-
-class FilesPipelineTestCaseFields(unittest.TestCase):
+class FilesPipelineTestCaseFieldsMixin:
 
     def test_item_fields_default(self):
-        from scrapy.contrib.pipeline.files import FilesPipeline
-        class TestItem(Item):
-            name = Field()
-            file_urls = Field()
-            files = Field()
         url = 'http://www.example.com/files/1.txt'
-        item = TestItem({'name': 'item1', 'file_urls': [url]})
+        item = self.item_class(name='item1', file_urls=[url])
         pipeline = FilesPipeline.from_settings(Settings({'FILES_STORE': 's3://example/files/'}))
         requests = list(pipeline.get_media_requests(item, None))
         self.assertEqual(requests[0].url, url)
         results = [(True, {'url': url})]
-        pipeline.item_completed(results, item, None)
-        self.assertEqual(item['files'], [results[0][1]])
+        item = pipeline.item_completed(results, item, None)
+        files = ItemAdapter(item).get("files")
+        self.assertEqual(files, [results[0][1]])
+        self.assertIsInstance(item, self.item_class)
 
     def test_item_fields_override_settings(self):
-        from scrapy.contrib.pipeline.files import FilesPipeline
-        class TestItem(Item):
-            name = Field()
-            files = Field()
-            stored_file = Field()
         url = 'http://www.example.com/files/1.txt'
-        item = TestItem({'name': 'item1', 'files': [url]})
-        pipeline = FilesPipeline.from_settings(Settings({'FILES_STORE': 's3://example/files/',
-                'FILES_URLS_FIELD': 'files', 'FILES_RESULT_FIELD': 'stored_file'}))
+        item = self.item_class(name='item1', custom_file_urls=[url])
+        pipeline = FilesPipeline.from_settings(Settings({
+            'FILES_STORE': 's3://example/files/',
+            'FILES_URLS_FIELD': 'custom_file_urls',
+            'FILES_RESULT_FIELD': 'custom_files'
+        }))
         requests = list(pipeline.get_media_requests(item, None))
         self.assertEqual(requests[0].url, url)
         results = [(True, {'url': url})]
-        pipeline.item_completed(results, item, None)
-        self.assertEqual(item['stored_file'], [results[0][1]])
+        item = pipeline.item_completed(results, item, None)
+        custom_files = ItemAdapter(item).get("custom_files")
+        self.assertEqual(custom_files, [results[0][1]])
+        self.assertIsInstance(item, self.item_class)
+
+
+class FilesPipelineTestCaseFieldsDict(FilesPipelineTestCaseFieldsMixin, unittest.TestCase):
+    item_class = dict
+
+
+class FilesPipelineTestItem(Item):
+    name = Field()
+    # default fields
+    file_urls = Field()
+    files = Field()
+    # overridden fields
+    custom_file_urls = Field()
+    custom_files = Field()
+
+
+class FilesPipelineTestCaseFieldsItem(FilesPipelineTestCaseFieldsMixin, unittest.TestCase):
+    item_class = FilesPipelineTestItem
+
+
+@skipIf(not make_dataclass, "dataclasses module is not available")
+class FilesPipelineTestCaseFieldsDataClass(FilesPipelineTestCaseFieldsMixin, unittest.TestCase):
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        if make_dataclass:
+            self.item_class = make_dataclass(
+                "FilesPipelineTestDataClass",
+                [
+                    ("name", str),
+                    # default fields
+                    ("file_urls", list, dataclass_field(default_factory=list)),
+                    ("files", list, dataclass_field(default_factory=list)),
+                    # overridden fields
+                    ("custom_file_urls", list, dataclass_field(default_factory=list)),
+                    ("custom_files", list, dataclass_field(default_factory=list)),
+                ],
+            )
+
+
+@attr.s
+class FilesPipelineTestAttrsItem:
+    name = attr.ib(default="")
+    # default fields
+    file_urls = attr.ib(default=lambda: [])
+    files = attr.ib(default=lambda: [])
+    # overridden fields
+    custom_file_urls = attr.ib(default=lambda: [])
+    custom_files = attr.ib(default=lambda: [])
+
+
+class FilesPipelineTestCaseFieldsAttrsItem(FilesPipelineTestCaseFieldsMixin, unittest.TestCase):
+    item_class = FilesPipelineTestAttrsItem
+
+
+class FilesPipelineTestCaseCustomSettings(unittest.TestCase):
+    default_cls_settings = {
+        "EXPIRES": 90,
+        "FILES_URLS_FIELD": "file_urls",
+        "FILES_RESULT_FIELD": "files"
+    }
+    file_cls_attr_settings_map = {
+        ("EXPIRES", "FILES_EXPIRES", "expires"),
+        ("FILES_URLS_FIELD", "FILES_URLS_FIELD", "files_urls_field"),
+        ("FILES_RESULT_FIELD", "FILES_RESULT_FIELD", "files_result_field")
+    }
+
+    def setUp(self):
+        self.tempdir = mkdtemp()
+
+    def tearDown(self):
+        rmtree(self.tempdir)
+
+    def _generate_fake_settings(self, prefix=None):
+
+        def random_string():
+            return "".join([chr(random.randint(97, 123)) for _ in range(10)])
+
+        settings = {
+            "FILES_EXPIRES": random.randint(100, 1000),
+            "FILES_URLS_FIELD": random_string(),
+            "FILES_RESULT_FIELD": random_string(),
+            "FILES_STORE": self.tempdir
+        }
+        if not prefix:
+            return settings
+
+        return {prefix.upper() + "_" + k if k != "FILES_STORE" else k: v for k, v in settings.items()}
+
+    def _generate_fake_pipeline(self):
+
+        class UserDefinedFilePipeline(FilesPipeline):
+            EXPIRES = 1001
+            FILES_URLS_FIELD = "alfa"
+            FILES_RESULT_FIELD = "beta"
+
+        return UserDefinedFilePipeline
+
+    def test_different_settings_for_different_instances(self):
+        """
+        If there are different instances with different settings they should keep
+        different settings.
+        """
+        custom_settings = self._generate_fake_settings()
+        another_pipeline = FilesPipeline.from_settings(Settings(custom_settings))
+        one_pipeline = FilesPipeline(self.tempdir)
+        for pipe_attr, settings_attr, pipe_ins_attr in self.file_cls_attr_settings_map:
+            default_value = self.default_cls_settings[pipe_attr]
+            self.assertEqual(getattr(one_pipeline, pipe_attr), default_value)
+            custom_value = custom_settings[settings_attr]
+            self.assertNotEqual(default_value, custom_value)
+            self.assertEqual(getattr(another_pipeline, pipe_ins_attr), custom_value)
+
+    def test_subclass_attributes_preserved_if_no_settings(self):
+        """
+        If subclasses override class attributes and there are no special settings those values should be kept.
+        """
+        pipe_cls = self._generate_fake_pipeline()
+        pipe = pipe_cls.from_settings(Settings({"FILES_STORE": self.tempdir}))
+        for pipe_attr, settings_attr, pipe_ins_attr in self.file_cls_attr_settings_map:
+            custom_value = getattr(pipe, pipe_ins_attr)
+            self.assertNotEqual(custom_value, self.default_cls_settings[pipe_attr])
+            self.assertEqual(getattr(pipe, pipe_ins_attr), getattr(pipe, pipe_attr))
+
+    def test_subclass_attrs_preserved_custom_settings(self):
+        """
+        If file settings are defined but they are not defined for subclass
+        settings should be preserved.
+        """
+        pipeline_cls = self._generate_fake_pipeline()
+        settings = self._generate_fake_settings()
+        pipeline = pipeline_cls.from_settings(Settings(settings))
+        for pipe_attr, settings_attr, pipe_ins_attr in self.file_cls_attr_settings_map:
+            value = getattr(pipeline, pipe_ins_attr)
+            setting_value = settings.get(settings_attr)
+            self.assertNotEqual(value, self.default_cls_settings[pipe_attr])
+            self.assertEqual(value, setting_value)
+
+    def test_no_custom_settings_for_subclasses(self):
+        """
+        If there are no settings for subclass and no subclass attributes, pipeline should use
+        attributes of base class.
+        """
+        class UserDefinedFilesPipeline(FilesPipeline):
+            pass
+
+        user_pipeline = UserDefinedFilesPipeline.from_settings(Settings({"FILES_STORE": self.tempdir}))
+        for pipe_attr, settings_attr, pipe_ins_attr in self.file_cls_attr_settings_map:
+            # Values from settings for custom pipeline should be set on pipeline instance.
+            custom_value = self.default_cls_settings.get(pipe_attr.upper())
+            self.assertEqual(getattr(user_pipeline, pipe_ins_attr), custom_value)
+
+    def test_custom_settings_for_subclasses(self):
+        """
+        If there are custom settings for subclass and NO class attributes, pipeline should use custom
+        settings.
+        """
+        class UserDefinedFilesPipeline(FilesPipeline):
+            pass
+
+        prefix = UserDefinedFilesPipeline.__name__.upper()
+        settings = self._generate_fake_settings(prefix=prefix)
+        user_pipeline = UserDefinedFilesPipeline.from_settings(Settings(settings))
+        for pipe_attr, settings_attr, pipe_inst_attr in self.file_cls_attr_settings_map:
+            # Values from settings for custom pipeline should be set on pipeline instance.
+            custom_value = settings.get(prefix + "_" + settings_attr)
+            self.assertNotEqual(custom_value, self.default_cls_settings[pipe_attr])
+            self.assertEqual(getattr(user_pipeline, pipe_inst_attr), custom_value)
+
+    def test_custom_settings_and_class_attrs_for_subclasses(self):
+        """
+        If there are custom settings for subclass AND class attributes
+        setting keys are preferred and override attributes.
+        """
+        pipeline_cls = self._generate_fake_pipeline()
+        prefix = pipeline_cls.__name__.upper()
+        settings = self._generate_fake_settings(prefix=prefix)
+        user_pipeline = pipeline_cls.from_settings(Settings(settings))
+        for pipe_cls_attr, settings_attr, pipe_inst_attr in self.file_cls_attr_settings_map:
+            custom_value = settings.get(prefix + "_" + settings_attr)
+            self.assertNotEqual(custom_value, self.default_cls_settings[pipe_cls_attr])
+            self.assertEqual(getattr(user_pipeline, pipe_inst_attr), custom_value)
+
+    def test_cls_attrs_with_DEFAULT_prefix(self):
+        class UserDefinedFilesPipeline(FilesPipeline):
+            DEFAULT_FILES_RESULT_FIELD = "this"
+            DEFAULT_FILES_URLS_FIELD = "that"
+
+        pipeline = UserDefinedFilesPipeline.from_settings(Settings({"FILES_STORE": self.tempdir}))
+        self.assertEqual(pipeline.files_result_field, "this")
+        self.assertEqual(pipeline.files_urls_field, "that")
+
+    def test_user_defined_subclass_default_key_names(self):
+        """Test situation when user defines subclass of FilesPipeline,
+        but uses attribute names for default pipeline (without prefixing
+        them with pipeline class name).
+        """
+        settings = self._generate_fake_settings()
+
+        class UserPipe(FilesPipeline):
+            pass
+
+        pipeline_cls = UserPipe.from_settings(Settings(settings))
+
+        for pipe_attr, settings_attr, pipe_inst_attr in self.file_cls_attr_settings_map:
+            expected_value = settings.get(settings_attr)
+            self.assertEqual(getattr(pipeline_cls, pipe_inst_attr),
+                             expected_value)
+
+
+class TestS3FilesStore(unittest.TestCase):
+    @defer.inlineCallbacks
+    def test_persist(self):
+        assert_aws_environ()
+        uri = os.environ.get('S3_TEST_FILE_URI')
+        if not uri:
+            raise unittest.SkipTest("No S3 URI available for testing")
+        data = b"TestS3FilesStore: \xe2\x98\x83"
+        buf = BytesIO(data)
+        meta = {'foo': 'bar'}
+        path = ''
+        store = S3FilesStore(uri)
+        yield store.persist_file(
+            path, buf, info=None, meta=meta,
+            headers={'Content-Type': 'image/png'})
+        s = yield store.stat_file(path, info=None)
+        self.assertIn('last_modified', s)
+        self.assertIn('checksum', s)
+        self.assertEqual(s['checksum'], '3187896a9657a28163abb31667df64c8')
+        u = urlparse(uri)
+        content, key = get_s3_content_and_delete(
+            u.hostname, u.path[1:], with_key=True)
+        self.assertEqual(content, data)
+        if is_botocore():
+            self.assertEqual(key['Metadata'], {'foo': 'bar'})
+            self.assertEqual(
+                key['CacheControl'], S3FilesStore.HEADERS['Cache-Control'])
+            self.assertEqual(key['ContentType'], 'image/png')
+        else:
+            self.assertEqual(key.metadata, {'foo': 'bar'})
+            self.assertEqual(
+                key.cache_control, S3FilesStore.HEADERS['Cache-Control'])
+            self.assertEqual(key.content_type, 'image/png')
+
+
+class TestGCSFilesStore(unittest.TestCase):
+    @defer.inlineCallbacks
+    def test_persist(self):
+        assert_gcs_environ()
+        uri = os.environ.get('GCS_TEST_FILE_URI')
+        if not uri:
+            raise unittest.SkipTest("No GCS URI available for testing")
+        data = b"TestGCSFilesStore: \xe2\x98\x83"
+        buf = BytesIO(data)
+        meta = {'foo': 'bar'}
+        path = 'full/filename'
+        store = GCSFilesStore(uri)
+        store.POLICY = 'authenticatedRead'
+        expected_policy = {'role': 'READER', 'entity': 'allAuthenticatedUsers'}
+        yield store.persist_file(path, buf, info=None, meta=meta, headers=None)
+        s = yield store.stat_file(path, info=None)
+        self.assertIn('last_modified', s)
+        self.assertIn('checksum', s)
+        self.assertEqual(s['checksum'], 'zc2oVgXkbQr2EQdSdw3OPA==')
+        u = urlparse(uri)
+        content, acl, blob = get_gcs_content_and_delete(u.hostname, u.path[1:] + path)
+        self.assertEqual(content, data)
+        self.assertEqual(blob.metadata, {'foo': 'bar'})
+        self.assertEqual(blob.cache_control, GCSFilesStore.CACHE_CONTROL)
+        self.assertEqual(blob.content_type, 'application/octet-stream')
+        self.assertIn(expected_policy, acl)
+
+
+class TestFTPFileStore(unittest.TestCase):
+    @defer.inlineCallbacks
+    def test_persist(self):
+        uri = os.environ.get('FTP_TEST_FILE_URI')
+        if not uri:
+            raise unittest.SkipTest("No FTP URI available for testing")
+        data = b"TestFTPFilesStore: \xe2\x98\x83"
+        buf = BytesIO(data)
+        meta = {'foo': 'bar'}
+        path = 'full/filename'
+        store = FTPFilesStore(uri)
+        empty_dict = yield store.stat_file(path, info=None)
+        self.assertEqual(empty_dict, {})
+        yield store.persist_file(path, buf, info=None, meta=meta, headers=None)
+        stat = yield store.stat_file(path, info=None)
+        self.assertIn('last_modified', stat)
+        self.assertIn('checksum', stat)
+        self.assertEqual(stat['checksum'], 'd113d66b2ec7258724a268bd88eef6b6')
+        path = '%s/%s' % (store.basedir, path)
+        content = get_ftp_content_and_delete(
+            path, store.host, store.port,
+            store.username, store.password, store.USE_ACTIVE_MODE)
+        self.assertEqual(data.decode(), content)
 
 
 class ItemWithFiles(Item):
@@ -183,10 +513,10 @@ def _create_item_with_files(*files):
     return item
 
 
-def _prepare_request_object(item_url):
+def _prepare_request_object(item_url, flags=None):
     return Request(
         item_url,
-        meta={'response': Response(item_url, status=200, body='data')})
+        meta={'response': Response(item_url, status=200, body=b'data', flags=flags)})
 
 
 if __name__ == "__main__":
diff --git a/tests/test_pipeline_images.py b/tests/test_pipeline_images.py
index a3b1059ef..082e9ee21 100644
--- a/tests/test_pipeline_images.py
+++ b/tests/test_pipeline_images.py
@@ -1,23 +1,35 @@
-import os
 import hashlib
-import warnings
-from tempfile import mkdtemp, TemporaryFile
+import io
+import random
 from shutil import rmtree
+from tempfile import mkdtemp
+from unittest import skipIf
 
+import attr
+from itemadapter import ItemAdapter
 from twisted.trial import unittest
 
-from scrapy.item import Item, Field
 from scrapy.http import Request, Response
+from scrapy.item import Field, Item
+from scrapy.pipelines.images import ImagesPipeline
 from scrapy.settings import Settings
-from scrapy.contrib.pipeline.images import ImagesPipeline
+from scrapy.utils.python import to_bytes
+
+
+try:
+    from dataclasses import make_dataclass, field as dataclass_field
+except ImportError:
+    make_dataclass = None
+    dataclass_field = None
+
 
 skip = False
 try:
     from PIL import Image
-except ImportError as e:
+except ImportError:
     skip = 'Missing Python Imaging Library, install https://pypi.python.org/pypi/Pillow'
 else:
-    encoders = set(('jpeg_encoder', 'jpeg_decoder'))
+    encoders = {'jpeg_encoder', 'jpeg_decoder'}
     if not encoders.issubset(set(Image.core.__dict__)):
         skip = 'Missing JPEG encoders'
 
@@ -40,22 +52,29 @@ class ImagesPipelineTestCase(unittest.TestCase):
 
     def test_file_path(self):
         file_path = self.pipeline.file_path
-        self.assertEqual(file_path(Request("https://dev.mydeco.com/mydeco.gif")),
-                         'full/3fd165099d8e71b8a48b2683946e64dbfad8b52d.jpg')
-        self.assertEqual(file_path(Request("http://www.maddiebrown.co.uk///catalogue-items//image_54642_12175_95307.jpg")),
-                         'full/0ffcd85d563bca45e2f90becd0ca737bc58a00b2.jpg')
-        self.assertEqual(file_path(Request("https://dev.mydeco.com/two/dirs/with%20spaces%2Bsigns.gif")),
-                         'full/b250e3a74fff2e4703e310048a5b13eba79379d2.jpg')
-        self.assertEqual(file_path(Request("http://www.dfsonline.co.uk/get_prod_image.php?img=status_0907_mdm.jpg")),
-                         'full/4507be485f38b0da8a0be9eb2e1dfab8a19223f2.jpg')
-        self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532/")),
-                         'full/97ee6f8a46cbbb418ea91502fd24176865cf39b2.jpg')
-        self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532")),
-                         'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1.jpg')
-        self.assertEqual(file_path(Request("http://www.dorma.co.uk/images/product_details/2532"),
-                                   response=Response("http://www.dorma.co.uk/images/product_details/2532"),
-                                   info=object()),
-                         'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1.jpg')
+        self.assertEqual(
+            file_path(Request("https://dev.mydeco.com/mydeco.gif")),
+            'full/3fd165099d8e71b8a48b2683946e64dbfad8b52d.jpg')
+        self.assertEqual(
+            file_path(Request("http://www.maddiebrown.co.uk///catalogue-items//image_54642_12175_95307.jpg")),
+            'full/0ffcd85d563bca45e2f90becd0ca737bc58a00b2.jpg')
+        self.assertEqual(
+            file_path(Request("https://dev.mydeco.com/two/dirs/with%20spaces%2Bsigns.gif")),
+            'full/b250e3a74fff2e4703e310048a5b13eba79379d2.jpg')
+        self.assertEqual(
+            file_path(Request("http://www.dfsonline.co.uk/get_prod_image.php?img=status_0907_mdm.jpg")),
+            'full/4507be485f38b0da8a0be9eb2e1dfab8a19223f2.jpg')
+        self.assertEqual(
+            file_path(Request("http://www.dorma.co.uk/images/product_details/2532/")),
+            'full/97ee6f8a46cbbb418ea91502fd24176865cf39b2.jpg')
+        self.assertEqual(
+            file_path(Request("http://www.dorma.co.uk/images/product_details/2532")),
+            'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1.jpg')
+        self.assertEqual(
+            file_path(Request("http://www.dorma.co.uk/images/product_details/2532"),
+                      response=Response("http://www.dorma.co.uk/images/product_details/2532"),
+                      info=object()),
+            'full/244e0dd7d96a3b7b01f54eded250c9e272577aa1.jpg')
 
     def test_thumbnail_name(self):
         thumb_path = self.pipeline.thumb_path
@@ -79,20 +98,28 @@ class ImagesPipelineTestCase(unittest.TestCase):
         COLOUR = (0, 127, 255)
         im = _create_image('JPEG', 'RGB', SIZE, COLOUR)
         converted, _ = self.pipeline.convert_image(im)
-        self.assertEquals(converted.mode, 'RGB')
-        self.assertEquals(converted.getcolors(), [(10000, COLOUR)])
+        self.assertEqual(converted.mode, 'RGB')
+        self.assertEqual(converted.getcolors(), [(10000, COLOUR)])
 
         # check that thumbnail keep image ratio
         thumbnail, _ = self.pipeline.convert_image(converted, size=(10, 25))
-        self.assertEquals(thumbnail.mode, 'RGB')
-        self.assertEquals(thumbnail.size, (10, 10))
+        self.assertEqual(thumbnail.mode, 'RGB')
+        self.assertEqual(thumbnail.size, (10, 10))
 
         # transparency case: RGBA and PNG
         COLOUR = (0, 127, 255, 50)
         im = _create_image('PNG', 'RGBA', SIZE, COLOUR)
         converted, _ = self.pipeline.convert_image(im)
-        self.assertEquals(converted.mode, 'RGB')
-        self.assertEquals(converted.getcolors(), [(10000, (205, 230, 255))])
+        self.assertEqual(converted.mode, 'RGB')
+        self.assertEqual(converted.getcolors(), [(10000, (205, 230, 255))])
+
+        # transparency case with palette: P and PNG
+        COLOUR = (0, 127, 255, 50)
+        im = _create_image('PNG', 'RGBA', SIZE, COLOUR)
+        im = im.convert('P')
+        converted, _ = self.pipeline.convert_image(im)
+        self.assertEqual(converted.mode, 'RGB')
+        self.assertEqual(converted.getcolors(), [(10000, (205, 230, 255))])
 
 
 class DeprecatedImagesPipeline(ImagesPipeline):
@@ -100,107 +127,288 @@ class DeprecatedImagesPipeline(ImagesPipeline):
         return self.image_key(url)
 
     def image_key(self, url):
-        image_guid = hashlib.sha1(url).hexdigest()
+        image_guid = hashlib.sha1(to_bytes(url)).hexdigest()
         return 'empty/%s.jpg' % (image_guid)
 
     def thumb_key(self, url, thumb_id):
-        thumb_guid = hashlib.sha1(url).hexdigest()
+        thumb_guid = hashlib.sha1(to_bytes(url)).hexdigest()
         return 'thumbsup/%s/%s.jpg' % (thumb_id, thumb_guid)
 
 
-class DeprecatedImagesPipelineTestCase(unittest.TestCase):
-    def setUp(self):
-        self.tempdir = mkdtemp()
-
-    def init_pipeline(self, pipeline_class):
-        self.pipeline = pipeline_class(self.tempdir, download_func=_mocked_download_func)
-        self.pipeline.open_spider(None)
-
-    def test_default_file_key_method(self):
-        self.init_pipeline(ImagesPipeline)
-        with warnings.catch_warnings(record=True) as w:
-            warnings.simplefilter('always')
-            self.assertEqual(self.pipeline.file_key("https://dev.mydeco.com/mydeco.gif"),
-                             'full/3fd165099d8e71b8a48b2683946e64dbfad8b52d.jpg')
-            self.assertEqual(len(w), 1)
-            self.assertTrue('image_key(url) and file_key(url) methods are deprecated' in str(w[-1].message))
-
-    def test_default_image_key_method(self):
-        self.init_pipeline(ImagesPipeline)
-        with warnings.catch_warnings(record=True) as w:
-            warnings.simplefilter('always')
-            self.assertEqual(self.pipeline.image_key("https://dev.mydeco.com/mydeco.gif"),
-                             'full/3fd165099d8e71b8a48b2683946e64dbfad8b52d.jpg')
-            self.assertEqual(len(w), 1)
-            self.assertTrue('image_key(url) and file_key(url) methods are deprecated' in str(w[-1].message))
-
-    def test_overridden_file_key_method(self):
-        self.init_pipeline(DeprecatedImagesPipeline)
-        with warnings.catch_warnings(record=True) as w:
-            warnings.simplefilter('always')
-            self.assertEqual(self.pipeline.file_path(Request("https://dev.mydeco.com/mydeco.gif")),
-                             'empty/3fd165099d8e71b8a48b2683946e64dbfad8b52d.jpg')
-            self.assertEqual(len(w), 1)
-            self.assertTrue('image_key(url) and file_key(url) methods are deprecated' in str(w[-1].message))
-
-    def test_default_thumb_key_method(self):
-        self.init_pipeline(ImagesPipeline)
-        with warnings.catch_warnings(record=True) as w:
-            warnings.simplefilter('always')
-            self.assertEqual(self.pipeline.thumb_key("file:///tmp/foo.jpg", 50),
-                             'thumbs/50/38a86208c36e59d4404db9e37ce04be863ef0335.jpg')
-            self.assertEqual(len(w), 1)
-            self.assertTrue('thumb_key(url) method is deprecated' in str(w[-1].message))
-
-    def test_overridden_thumb_key_method(self):
-        self.init_pipeline(DeprecatedImagesPipeline)
-        with warnings.catch_warnings(record=True) as w:
-            warnings.simplefilter('always')
-            self.assertEqual(self.pipeline.thumb_path(Request("file:///tmp/foo.jpg"), 50),
-                             'thumbsup/50/38a86208c36e59d4404db9e37ce04be863ef0335.jpg')
-            self.assertEqual(len(w), 1)
-            self.assertTrue('thumb_key(url) method is deprecated' in str(w[-1].message))
-
-    def tearDown(self):
-        rmtree(self.tempdir)
-
-
-class ImagesPipelineTestCaseFields(unittest.TestCase):
+class ImagesPipelineTestCaseFieldsMixin:
 
     def test_item_fields_default(self):
-        from scrapy.contrib.pipeline.images import ImagesPipeline
-        class TestItem(Item):
-            name = Field()
-            image_urls = Field()
-            images = Field()
         url = 'http://www.example.com/images/1.jpg'
-        item = TestItem({'name': 'item1', 'image_urls': [url]})
+        item = self.item_class(name='item1', image_urls=[url])
         pipeline = ImagesPipeline.from_settings(Settings({'IMAGES_STORE': 's3://example/images/'}))
         requests = list(pipeline.get_media_requests(item, None))
         self.assertEqual(requests[0].url, url)
         results = [(True, {'url': url})]
-        pipeline.item_completed(results, item, None)
-        self.assertEqual(item['images'], [results[0][1]])
+        item = pipeline.item_completed(results, item, None)
+        images = ItemAdapter(item).get("images")
+        self.assertEqual(images, [results[0][1]])
+        self.assertIsInstance(item, self.item_class)
 
     def test_item_fields_override_settings(self):
-        from scrapy.contrib.pipeline.images import ImagesPipeline
-        class TestItem(Item):
-            name = Field()
-            image = Field()
-            stored_image = Field()
         url = 'http://www.example.com/images/1.jpg'
-        item = TestItem({'name': 'item1', 'image': [url]})
-        pipeline = ImagesPipeline.from_settings(Settings({'IMAGES_STORE': 's3://example/images/',
-                'IMAGES_URLS_FIELD': 'image', 'IMAGES_RESULT_FIELD': 'stored_image'}))
+        item = self.item_class(name='item1', custom_image_urls=[url])
+        pipeline = ImagesPipeline.from_settings(Settings({
+            'IMAGES_STORE': 's3://example/images/',
+            'IMAGES_URLS_FIELD': 'custom_image_urls',
+            'IMAGES_RESULT_FIELD': 'custom_images'
+        }))
         requests = list(pipeline.get_media_requests(item, None))
         self.assertEqual(requests[0].url, url)
         results = [(True, {'url': url})]
-        pipeline.item_completed(results, item, None)
-        self.assertEqual(item['stored_image'], [results[0][1]])
+        item = pipeline.item_completed(results, item, None)
+        custom_images = ItemAdapter(item).get("custom_images")
+        self.assertEqual(custom_images, [results[0][1]])
+        self.assertIsInstance(item, self.item_class)
+
+
+class ImagesPipelineTestCaseFieldsDict(ImagesPipelineTestCaseFieldsMixin, unittest.TestCase):
+    item_class = dict
+
+
+class ImagesPipelineTestItem(Item):
+    name = Field()
+    # default fields
+    image_urls = Field()
+    images = Field()
+    # overridden fields
+    custom_image_urls = Field()
+    custom_images = Field()
+
+
+class ImagesPipelineTestCaseFieldsItem(ImagesPipelineTestCaseFieldsMixin, unittest.TestCase):
+    item_class = ImagesPipelineTestItem
+
+
+@skipIf(not make_dataclass, "dataclasses module is not available")
+class ImagesPipelineTestCaseFieldsDataClass(ImagesPipelineTestCaseFieldsMixin, unittest.TestCase):
+    item_class = None
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        if make_dataclass:
+            self.item_class = make_dataclass(
+                "FilesPipelineTestDataClass",
+                [
+                    ("name", str),
+                    # default fields
+                    ("image_urls", list, dataclass_field(default_factory=list)),
+                    ("images", list, dataclass_field(default_factory=list)),
+                    # overridden fields
+                    ("custom_image_urls", list, dataclass_field(default_factory=list)),
+                    ("custom_images", list, dataclass_field(default_factory=list)),
+                ],
+            )
+
+
+@attr.s
+class ImagesPipelineTestAttrsItem:
+    name = attr.ib(default="")
+    # default fields
+    image_urls = attr.ib(default=lambda: [])
+    images = attr.ib(default=lambda: [])
+    # overridden fields
+    custom_image_urls = attr.ib(default=lambda: [])
+    custom_images = attr.ib(default=lambda: [])
+
+
+class ImagesPipelineTestCaseFieldsAttrsItem(ImagesPipelineTestCaseFieldsMixin, unittest.TestCase):
+    item_class = ImagesPipelineTestAttrsItem
+
+
+class ImagesPipelineTestCaseCustomSettings(unittest.TestCase):
+    img_cls_attribute_names = [
+        # Pipeline attribute names with corresponding setting names.
+        ("EXPIRES", "IMAGES_EXPIRES"),
+        ("MIN_WIDTH", "IMAGES_MIN_WIDTH"),
+        ("MIN_HEIGHT", "IMAGES_MIN_HEIGHT"),
+        ("IMAGES_URLS_FIELD", "IMAGES_URLS_FIELD"),
+        ("IMAGES_RESULT_FIELD", "IMAGES_RESULT_FIELD"),
+        ("THUMBS", "IMAGES_THUMBS")
+    ]
+
+    # This should match what is defined in ImagesPipeline.
+    default_pipeline_settings = dict(
+        MIN_WIDTH=0,
+        MIN_HEIGHT=0,
+        EXPIRES=90,
+        THUMBS={},
+        IMAGES_URLS_FIELD='image_urls',
+        IMAGES_RESULT_FIELD='images'
+    )
+
+    def setUp(self):
+        self.tempdir = mkdtemp()
+
+    def tearDown(self):
+        rmtree(self.tempdir)
+
+    def _generate_fake_settings(self, prefix=None):
+        """
+        :param prefix: string for setting keys
+        :return: dictionary of image pipeline settings
+        """
+
+        def random_string():
+            return "".join([chr(random.randint(97, 123)) for _ in range(10)])
+
+        settings = {
+            "IMAGES_EXPIRES": random.randint(100, 1000),
+            "IMAGES_STORE": self.tempdir,
+            "IMAGES_RESULT_FIELD": random_string(),
+            "IMAGES_URLS_FIELD": random_string(),
+            "IMAGES_MIN_WIDTH": random.randint(1, 1000),
+            "IMAGES_MIN_HEIGHT": random.randint(1, 1000),
+            "IMAGES_THUMBS": {
+                'small': (random.randint(1, 1000), random.randint(1, 1000)),
+                'big': (random.randint(1, 1000), random.randint(1, 1000))
+            }
+        }
+        if not prefix:
+            return settings
+
+        return {prefix.upper() + "_" + k if k != "IMAGES_STORE" else k: v for k, v in settings.items()}
+
+    def _generate_fake_pipeline_subclass(self):
+        """
+        :return: ImagePipeline class will all uppercase attributes set.
+        """
+        class UserDefinedImagePipeline(ImagesPipeline):
+            # Values should be in different range than fake_settings.
+            MIN_WIDTH = random.randint(1000, 2000)
+            MIN_HEIGHT = random.randint(1000, 2000)
+            THUMBS = {
+                'small': (random.randint(1000, 2000), random.randint(1000, 2000)),
+                'big': (random.randint(1000, 2000), random.randint(1000, 2000))
+            }
+            EXPIRES = random.randint(1000, 2000)
+            IMAGES_URLS_FIELD = "field_one"
+            IMAGES_RESULT_FIELD = "field_two"
+
+        return UserDefinedImagePipeline
+
+    def test_different_settings_for_different_instances(self):
+        """
+        If there are two instances of ImagesPipeline class with different settings, they should
+        have different settings.
+        """
+        custom_settings = self._generate_fake_settings()
+        default_settings = Settings()
+        default_sts_pipe = ImagesPipeline(self.tempdir, settings=default_settings)
+        user_sts_pipe = ImagesPipeline.from_settings(Settings(custom_settings))
+        for pipe_attr, settings_attr in self.img_cls_attribute_names:
+            expected_default_value = self.default_pipeline_settings.get(pipe_attr)
+            custom_value = custom_settings.get(settings_attr)
+            self.assertNotEqual(expected_default_value, custom_value)
+            self.assertEqual(getattr(default_sts_pipe, pipe_attr.lower()), expected_default_value)
+            self.assertEqual(getattr(user_sts_pipe, pipe_attr.lower()), custom_value)
+
+    def test_subclass_attrs_preserved_default_settings(self):
+        """
+        If image settings are not defined at all subclass of ImagePipeline takes values
+        from class attributes.
+        """
+        pipeline_cls = self._generate_fake_pipeline_subclass()
+        pipeline = pipeline_cls.from_settings(Settings({"IMAGES_STORE": self.tempdir}))
+        for pipe_attr, settings_attr in self.img_cls_attribute_names:
+            # Instance attribute (lowercase) must be equal to class attribute (uppercase).
+            attr_value = getattr(pipeline, pipe_attr.lower())
+            self.assertNotEqual(attr_value, self.default_pipeline_settings[pipe_attr])
+            self.assertEqual(attr_value, getattr(pipeline, pipe_attr))
+
+    def test_subclass_attrs_preserved_custom_settings(self):
+        """
+        If image settings are defined but they are not defined for subclass default
+        values taken from settings should be preserved.
+        """
+        pipeline_cls = self._generate_fake_pipeline_subclass()
+        settings = self._generate_fake_settings()
+        pipeline = pipeline_cls.from_settings(Settings(settings))
+        for pipe_attr, settings_attr in self.img_cls_attribute_names:
+            # Instance attribute (lowercase) must be equal to
+            # value defined in settings.
+            value = getattr(pipeline, pipe_attr.lower())
+            self.assertNotEqual(value, self.default_pipeline_settings[pipe_attr])
+            setings_value = settings.get(settings_attr)
+            self.assertEqual(value, setings_value)
+
+    def test_no_custom_settings_for_subclasses(self):
+        """
+        If there are no settings for subclass and no subclass attributes, pipeline should use
+        attributes of base class.
+        """
+        class UserDefinedImagePipeline(ImagesPipeline):
+            pass
+
+        user_pipeline = UserDefinedImagePipeline.from_settings(Settings({"IMAGES_STORE": self.tempdir}))
+        for pipe_attr, settings_attr in self.img_cls_attribute_names:
+            # Values from settings for custom pipeline should be set on pipeline instance.
+            custom_value = self.default_pipeline_settings.get(pipe_attr.upper())
+            self.assertEqual(getattr(user_pipeline, pipe_attr.lower()), custom_value)
+
+    def test_custom_settings_for_subclasses(self):
+        """
+        If there are custom settings for subclass and NO class attributes, pipeline should use custom
+        settings.
+        """
+        class UserDefinedImagePipeline(ImagesPipeline):
+            pass
+
+        prefix = UserDefinedImagePipeline.__name__.upper()
+        settings = self._generate_fake_settings(prefix=prefix)
+        user_pipeline = UserDefinedImagePipeline.from_settings(Settings(settings))
+        for pipe_attr, settings_attr in self.img_cls_attribute_names:
+            # Values from settings for custom pipeline should be set on pipeline instance.
+            custom_value = settings.get(prefix + "_" + settings_attr)
+            self.assertNotEqual(custom_value, self.default_pipeline_settings[pipe_attr])
+            self.assertEqual(getattr(user_pipeline, pipe_attr.lower()), custom_value)
+
+    def test_custom_settings_and_class_attrs_for_subclasses(self):
+        """
+        If there are custom settings for subclass AND class attributes
+        setting keys are preferred and override attributes.
+        """
+        pipeline_cls = self._generate_fake_pipeline_subclass()
+        prefix = pipeline_cls.__name__.upper()
+        settings = self._generate_fake_settings(prefix=prefix)
+        user_pipeline = pipeline_cls.from_settings(Settings(settings))
+        for pipe_attr, settings_attr in self.img_cls_attribute_names:
+            custom_value = settings.get(prefix + "_" + settings_attr)
+            self.assertNotEqual(custom_value, self.default_pipeline_settings[pipe_attr])
+            self.assertEqual(getattr(user_pipeline, pipe_attr.lower()), custom_value)
+
+    def test_cls_attrs_with_DEFAULT_prefix(self):
+        class UserDefinedImagePipeline(ImagesPipeline):
+            DEFAULT_IMAGES_URLS_FIELD = "something"
+            DEFAULT_IMAGES_RESULT_FIELD = "something_else"
+        pipeline = UserDefinedImagePipeline.from_settings(Settings({"IMAGES_STORE": self.tempdir}))
+        self.assertEqual(pipeline.images_result_field, "something_else")
+        self.assertEqual(pipeline.images_urls_field, "something")
+
+    def test_user_defined_subclass_default_key_names(self):
+        """Test situation when user defines subclass of ImagePipeline,
+        but uses attribute names for default pipeline (without prefixing
+        them with pipeline class name).
+        """
+        settings = self._generate_fake_settings()
+
+        class UserPipe(ImagesPipeline):
+            pass
+
+        pipeline_cls = UserPipe.from_settings(Settings(settings))
+
+        for pipe_attr, settings_attr in self.img_cls_attribute_names:
+            expected_value = settings.get(settings_attr)
+            self.assertEqual(getattr(pipeline_cls, pipe_attr.lower()),
+                             expected_value)
 
 
 def _create_image(format, *a, **kw):
-    buf = TemporaryFile()
+    buf = io.BytesIO()
     Image.new(*a, **kw).save(buf, format)
     buf.seek(0)
     return Image.open(buf)
diff --git a/tests/test_pipeline_media.py b/tests/test_pipeline_media.py
index e4fae3983..6afd47497 100644
--- a/tests/test_pipeline_media.py
+++ b/tests/test_pipeline_media.py
@@ -1,17 +1,20 @@
-from __future__ import print_function
+from testfixtures import LogCapture
 from twisted.trial import unittest
 from twisted.python.failure import Failure
 from twisted.internet import reactor
 from twisted.internet.defer import Deferred, inlineCallbacks
-from twisted.python import log as txlog
 
 from scrapy.http import Request, Response
-from scrapy.spider import Spider
+from scrapy.settings import Settings
+from scrapy.spiders import Spider
+from scrapy.utils.deprecate import ScrapyDeprecationWarning
 from scrapy.utils.request import request_fingerprint
-from scrapy.contrib.pipeline.media import MediaPipeline
+from scrapy.pipelines.images import ImagesPipeline
+from scrapy.pipelines.media import MediaPipeline
+from scrapy.pipelines.files import FileException
+from scrapy.utils.log import failure_to_exc_info
 from scrapy.utils.signal import disconnect_all
 from scrapy import signals
-from scrapy import log
 
 
 def _mocked_download_func(request, info):
@@ -22,10 +25,12 @@ def _mocked_download_func(request, info):
 class BaseMediaPipelineTestCase(unittest.TestCase):
 
     pipeline_class = MediaPipeline
+    settings = None
 
     def setUp(self):
         self.spider = Spider('media.com')
-        self.pipe = self.pipeline_class(download_func=_mocked_download_func)
+        self.pipe = self.pipeline_class(download_func=_mocked_download_func,
+                                        settings=Settings(self.settings))
         self.pipe.open_spider(self.spider)
         self.info = self.pipe.spiderinfo
 
@@ -44,7 +49,7 @@ class BaseMediaPipelineTestCase(unittest.TestCase):
 
     def test_default_media_downloaded(self):
         request = Request('http://url')
-        response = Response('http://url', body='')
+        response = Response('http://url', body=b'')
         assert self.pipe.media_downloaded(response, request, self.info) is response
 
     def test_default_media_failed(self):
@@ -60,26 +65,21 @@ class BaseMediaPipelineTestCase(unittest.TestCase):
         fail = Failure(Exception())
         results = [(True, 1), (False, fail)]
 
-        events = []
-        txlog.addObserver(events.append)
-        new_item = self.pipe.item_completed(results, item, self.info)
-        txlog.removeObserver(events.append)
-        self.flushLoggedErrors()
+        with LogCapture() as log:
+            new_item = self.pipe.item_completed(results, item, self.info)
 
         assert new_item is item
-        assert len(events) == 1
-        assert events[0]['logLevel'] == log.ERROR
-        assert events[0]['failure'] is fail
+        assert len(log.records) == 1
+        record = log.records[0]
+        assert record.levelname == 'ERROR'
+        self.assertTupleEqual(record.exc_info, failure_to_exc_info(fail))
 
         # disable failure logging and check again
         self.pipe.LOG_FAILED_RESULTS = False
-        events = []
-        txlog.addObserver(events.append)
-        new_item = self.pipe.item_completed(results, item, self.info)
-        txlog.removeObserver(events.append)
-        self.flushLoggedErrors()
+        with LogCapture() as log:
+            new_item = self.pipe.item_completed(results, item, self.info)
         assert new_item is item
-        assert len(events) == 0
+        assert len(log.records) == 0
 
     @inlineCallbacks
     def test_default_process_item(self):
@@ -87,38 +87,111 @@ class BaseMediaPipelineTestCase(unittest.TestCase):
         new_item = yield self.pipe.process_item(item, self.spider)
         assert new_item is item
 
+    def test_modify_media_request(self):
+        request = Request('http://url')
+        self.pipe._modify_media_request(request)
+        assert request.meta == {'handle_httpstatus_all': True}
+
+    def test_should_remove_req_res_references_before_caching_the_results(self):
+        """Regression test case to prevent a memory leak in the Media Pipeline.
+
+        The memory leak is triggered when an exception is raised when a Response
+        scheduled by the Media Pipeline is being returned. For example, when a
+        FileException('download-error') is raised because the Response status
+        code is not 200 OK.
+
+        It happens because we are keeping a reference to the Response object
+        inside the FileException context. This is caused by the way Twisted
+        return values from inline callbacks. It raises a custom exception
+        encapsulating the original return value.
+
+        The solution is to remove the exception context when this context is a
+        _DefGen_Return instance, the BaseException used by Twisted to pass the
+        returned value from those inline callbacks.
+
+        Maybe there's a better and more reliable way to test the case described
+        here, but it would be more complicated and involve running - or at least
+        mocking - some async steps from the Media Pipeline. The current test
+        case is simple and detects the problem very fast. On the other hand, it
+        would not detect another kind of leak happening due to old object
+        references being kept inside the Media Pipeline cache.
+
+        This problem does not occur in Python 2.7 since we don't have Exception
+        Chaining (https://www.python.org/dev/peps/pep-3134/).
+        """
+        # Create sample pair of Request and Response objects
+        request = Request('http://url')
+        response = Response('http://url', body=b'', request=request)
+
+        # Simulate the Media Pipeline behavior to produce a Twisted Failure
+        try:
+            # Simulate a Twisted inline callback returning a Response
+            raise StopIteration(response)
+        except StopIteration as exc:
+            def_gen_return_exc = exc
+            try:
+                # Simulate the media_downloaded callback raising a FileException
+                # This usually happens when the status code is not 200 OK
+                raise FileException('download-error')
+            except Exception as exc:
+                file_exc = exc
+                # Simulate Twisted capturing the FileException
+                # It encapsulates the exception inside a Twisted Failure
+                failure = Failure(file_exc)
+
+        # The Failure should encapsulate a FileException ...
+        self.assertEqual(failure.value, file_exc)
+        # ... and it should have the StopIteration exception set as its context
+        self.assertEqual(failure.value.__context__, def_gen_return_exc)
+
+        # Let's calculate the request fingerprint and fake some runtime data...
+        fp = request_fingerprint(request)
+        info = self.pipe.spiderinfo
+        info.downloading.add(fp)
+        info.waiting[fp] = []
+
+        # When calling the method that caches the Request's result ...
+        self.pipe._cache_result_and_execute_waiters(failure, fp, info)
+        # ... it should store the Twisted Failure ...
+        self.assertEqual(info.downloaded[fp], failure)
+        # ... encapsulating the original FileException ...
+        self.assertEqual(info.downloaded[fp].value, file_exc)
+        # ... but it should not store the StopIteration exception on its context
+        context = getattr(info.downloaded[fp].value, '__context__', None)
+        self.assertIsNone(context)
+
 
 class MockedMediaPipeline(MediaPipeline):
 
     def __init__(self, *args, **kwargs):
-        super(MockedMediaPipeline, self).__init__(*args, **kwargs)
+        super().__init__(*args, **kwargs)
         self._mockcalled = []
 
     def download(self, request, info):
         self._mockcalled.append('download')
-        return super(MockedMediaPipeline, self).download(request, info)
+        return super().download(request, info)
 
-    def media_to_download(self, request, info):
+    def media_to_download(self, request, info, *, item=None):
         self._mockcalled.append('media_to_download')
         if 'result' in request.meta:
             return request.meta.get('result')
-        return super(MockedMediaPipeline, self).media_to_download(request, info)
+        return super().media_to_download(request, info)
 
     def get_media_requests(self, item, info):
         self._mockcalled.append('get_media_requests')
         return item.get('requests')
 
-    def media_downloaded(self, response, request, info):
+    def media_downloaded(self, response, request, info, *, item=None):
         self._mockcalled.append('media_downloaded')
-        return super(MockedMediaPipeline, self).media_downloaded(response, request, info)
+        return super().media_downloaded(response, request, info)
 
     def media_failed(self, failure, request, info):
         self._mockcalled.append('media_failed')
-        return super(MockedMediaPipeline, self).media_failed(failure, request, info)
+        return super().media_failed(failure, request, info)
 
     def item_completed(self, results, item, info):
         self._mockcalled.append('item_completed')
-        item = super(MockedMediaPipeline, self).item_completed(results, item, info)
+        item = super().item_completed(results, item, info)
         item['results'] = results
         return item
 
@@ -127,32 +200,38 @@ class MediaPipelineTestCase(BaseMediaPipelineTestCase):
 
     pipeline_class = MockedMediaPipeline
 
+    def _callback(self, result):
+        self.pipe._mockcalled.append('request_callback')
+        return result
+
+    def _errback(self, result):
+        self.pipe._mockcalled.append('request_errback')
+        return result
+
     @inlineCallbacks
     def test_result_succeed(self):
-        cb = lambda _: self.pipe._mockcalled.append('request_callback') or _
-        eb = lambda _: self.pipe._mockcalled.append('request_errback') or _
         rsp = Response('http://url1')
-        req = Request('http://url1', meta=dict(response=rsp), callback=cb, errback=eb)
+        req = Request('http://url1', meta=dict(response=rsp),
+                      callback=self._callback, errback=self._errback)
         item = dict(requests=req)
         new_item = yield self.pipe.process_item(item, self.spider)
         self.assertEqual(new_item['results'], [(True, rsp)])
-        self.assertEqual(self.pipe._mockcalled,
-                ['get_media_requests', 'media_to_download',
-                    'media_downloaded', 'request_callback', 'item_completed'])
+        self.assertEqual(
+            self.pipe._mockcalled,
+            ['get_media_requests', 'media_to_download', 'media_downloaded', 'request_callback', 'item_completed'])
 
     @inlineCallbacks
     def test_result_failure(self):
         self.pipe.LOG_FAILED_RESULTS = False
-        cb = lambda _: self.pipe._mockcalled.append('request_callback') or _
-        eb = lambda _: self.pipe._mockcalled.append('request_errback') or _
         fail = Failure(Exception())
-        req = Request('http://url1', meta=dict(response=fail), callback=cb, errback=eb)
+        req = Request('http://url1', meta=dict(response=fail),
+                      callback=self._callback, errback=self._errback)
         item = dict(requests=req)
         new_item = yield self.pipe.process_item(item, self.spider)
         self.assertEqual(new_item['results'], [(False, fail)])
-        self.assertEqual(self.pipe._mockcalled,
-                ['get_media_requests', 'media_to_download',
-                    'media_failed', 'request_errback', 'item_completed'])
+        self.assertEqual(
+            self.pipe._mockcalled,
+            ['get_media_requests', 'media_to_download', 'media_failed', 'request_errback', 'item_completed'])
 
     @inlineCallbacks
     def test_mix_of_success_and_failure(self):
@@ -166,10 +245,10 @@ class MediaPipelineTestCase(BaseMediaPipelineTestCase):
         self.assertEqual(new_item['results'], [(True, rsp1), (False, fail)])
         m = self.pipe._mockcalled
         # only once
-        self.assertEqual(m[0], 'get_media_requests') # first hook called
+        self.assertEqual(m[0], 'get_media_requests')  # first hook called
         self.assertEqual(m.count('get_media_requests'), 1)
         self.assertEqual(m.count('item_completed'), 1)
-        self.assertEqual(m[-1], 'item_completed') # last hook called
+        self.assertEqual(m[-1], 'item_completed')  # last hook called
         # twice, one per request
         self.assertEqual(m.count('media_to_download'), 2)
         # one to handle success and other for failure
@@ -180,7 +259,7 @@ class MediaPipelineTestCase(BaseMediaPipelineTestCase):
     def test_get_media_requests(self):
         # returns single Request (without callback)
         req = Request('http://url')
-        item = dict(requests=req) # pass a single item
+        item = dict(requests=req)  # pass a single item
         new_item = yield self.pipe.process_item(item, self.spider)
         assert new_item is item
         assert request_fingerprint(req) in self.info.downloaded
@@ -232,6 +311,7 @@ class MediaPipelineTestCase(BaseMediaPipelineTestCase):
             return response
 
         rsp1 = Response('http://url')
+
         def rsp1_func():
             dfd = Deferred().addCallback(_check_downloading)
             reactor.callLater(.1, dfd.callback, rsp1)
@@ -252,5 +332,181 @@ class MediaPipelineTestCase(BaseMediaPipelineTestCase):
         item = dict(requests=req)
         new_item = yield self.pipe.process_item(item, self.spider)
         self.assertEqual(new_item['results'], [(True, 'ITSME')])
-        self.assertEqual(self.pipe._mockcalled, \
-                ['get_media_requests', 'media_to_download', 'item_completed'])
+        self.assertEqual(
+            self.pipe._mockcalled,
+            ['get_media_requests', 'media_to_download', 'item_completed'])
+
+
+class MockedMediaPipelineDeprecatedMethods(ImagesPipeline):
+
+    def __init__(self, *args, **kwargs):
+        super(MockedMediaPipelineDeprecatedMethods, self).__init__(*args, **kwargs)
+        self._mockcalled = []
+
+    def get_media_requests(self, item, info):
+        item_url = item['image_urls'][0]
+        return Request(
+            item_url,
+            meta={'response': Response(item_url, status=200, body=b'data')}
+        )
+
+    def inc_stats(self, *args, **kwargs):
+        return True
+
+    def media_to_download(self, request, info):
+        self._mockcalled.append('media_to_download')
+        return super(MockedMediaPipelineDeprecatedMethods, self).media_to_download(request, info)
+
+    def media_downloaded(self, response, request, info):
+        self._mockcalled.append('media_downloaded')
+        return super(MockedMediaPipelineDeprecatedMethods, self).media_downloaded(response, request, info)
+
+    def file_downloaded(self, response, request, info):
+        self._mockcalled.append('file_downloaded')
+        return super(MockedMediaPipelineDeprecatedMethods, self).file_downloaded(response, request, info)
+
+    def file_path(self, request, response=None, info=None):
+        self._mockcalled.append('file_path')
+        return super(MockedMediaPipelineDeprecatedMethods, self).file_path(request, response, info)
+
+    def get_images(self, response, request, info):
+        self._mockcalled.append('get_images')
+        return []
+
+    def image_downloaded(self, response, request, info):
+        self._mockcalled.append('image_downloaded')
+        return super(MockedMediaPipelineDeprecatedMethods, self).image_downloaded(response, request, info)
+
+
+class MediaPipelineDeprecatedMethodsTestCase(unittest.TestCase):
+
+    def setUp(self):
+        self.pipe = MockedMediaPipelineDeprecatedMethods(store_uri='store-uri', download_func=_mocked_download_func)
+        self.pipe.open_spider(None)
+        self.item = dict(image_urls=['http://picsum.photos/id/1014/200/300'], images=[])
+
+    def _assert_method_called_with_warnings(self, method, message, warnings):
+        self.assertIn(method, self.pipe._mockcalled)
+        warningShown = False
+        for warning in warnings:
+            if warning['message'] == message and warning['category'] == ScrapyDeprecationWarning:
+                warningShown = True
+        self.assertTrue(warningShown)
+
+    @inlineCallbacks
+    def test_media_to_download_called(self):
+        yield self.pipe.process_item(self.item, None)
+        warnings = self.flushWarnings([MediaPipeline._compatible])
+        message = (
+            'media_to_download(self, request, info) is deprecated, '
+            'please use media_to_download(self, request, info, *, item=None)'
+        )
+        self._assert_method_called_with_warnings('media_to_download', message, warnings)
+
+    @inlineCallbacks
+    def test_media_downloaded_called(self):
+        yield self.pipe.process_item(self.item, None)
+        warnings = self.flushWarnings([MediaPipeline._compatible])
+        message = (
+            'media_downloaded(self, response, request, info) is deprecated, '
+            'please use media_downloaded(self, response, request, info, *, item=None)'
+        )
+        self._assert_method_called_with_warnings('media_downloaded', message, warnings)
+
+    @inlineCallbacks
+    def test_file_downloaded_called(self):
+        yield self.pipe.process_item(self.item, None)
+        warnings = self.flushWarnings([MediaPipeline._compatible])
+        message = (
+            'file_downloaded(self, response, request, info) is deprecated, '
+            'please use file_downloaded(self, response, request, info, *, item=None)'
+        )
+        self._assert_method_called_with_warnings('file_downloaded', message, warnings)
+
+    @inlineCallbacks
+    def test_file_path_called(self):
+        yield self.pipe.process_item(self.item, None)
+        warnings = self.flushWarnings([MediaPipeline._compatible])
+        message = (
+            'file_path(self, request, response=None, info=None) is deprecated, '
+            'please use file_path(self, request, response=None, info=None, *, item=None)'
+        )
+        self._assert_method_called_with_warnings('file_path', message, warnings)
+
+    @inlineCallbacks
+    def test_get_images_called(self):
+        yield self.pipe.process_item(self.item, None)
+        warnings = self.flushWarnings([MediaPipeline._compatible])
+        message = (
+            'get_images(self, response, request, info) is deprecated, '
+            'please use get_images(self, response, request, info, *, item=None)'
+        )
+        self._assert_method_called_with_warnings('get_images', message, warnings)
+
+    @inlineCallbacks
+    def test_image_downloaded_called(self):
+        yield self.pipe.process_item(self.item, None)
+        warnings = self.flushWarnings([MediaPipeline._compatible])
+        message = (
+            'image_downloaded(self, response, request, info) is deprecated, '
+            'please use image_downloaded(self, response, request, info, *, item=None)'
+        )
+        self._assert_method_called_with_warnings('image_downloaded', message, warnings)
+
+
+class MediaPipelineAllowRedirectSettingsTestCase(unittest.TestCase):
+
+    def _assert_request_no3xx(self, pipeline_class, settings):
+        pipe = pipeline_class(settings=Settings(settings))
+        request = Request('http://url')
+        pipe._modify_media_request(request)
+
+        self.assertIn('handle_httpstatus_list', request.meta)
+        for status, check in [
+                (200, True),
+
+                # These are the status codes we want
+                # the downloader to handle itself
+                (301, False),
+                (302, False),
+                (302, False),
+                (307, False),
+                (308, False),
+
+                # we still want to get 4xx and 5xx
+                (400, True),
+                (404, True),
+                (500, True)]:
+            if check:
+                self.assertIn(status, request.meta['handle_httpstatus_list'])
+            else:
+                self.assertNotIn(status, request.meta['handle_httpstatus_list'])
+
+    def test_standard_setting(self):
+        self._assert_request_no3xx(
+            MediaPipeline,
+            {
+                'MEDIA_ALLOW_REDIRECTS': True
+            })
+
+    def test_subclass_standard_setting(self):
+
+        class UserDefinedPipeline(MediaPipeline):
+            pass
+
+        self._assert_request_no3xx(
+            UserDefinedPipeline,
+            {
+                'MEDIA_ALLOW_REDIRECTS': True
+            })
+
+    def test_subclass_specific_setting(self):
+
+        class UserDefinedPipeline(MediaPipeline):
+            pass
+
+        self._assert_request_no3xx(
+            UserDefinedPipeline,
+            {
+                'USERDEFINEDPIPELINE_MEDIA_ALLOW_REDIRECTS': True
+            })
diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py
new file mode 100644
index 000000000..c72f1a338
--- /dev/null
+++ b/tests/test_pipelines.py
@@ -0,0 +1,101 @@
+import asyncio
+
+from pytest import mark
+from twisted.internet import defer
+from twisted.internet.defer import Deferred
+from twisted.trial import unittest
+
+from scrapy import Spider, signals, Request
+from scrapy.utils.test import get_crawler, get_from_asyncio_queue
+
+from tests.mockserver import MockServer
+
+
+class SimplePipeline:
+    def process_item(self, item, spider):
+        item['pipeline_passed'] = True
+        return item
+
+
+class DeferredPipeline:
+    def cb(self, item):
+        item['pipeline_passed'] = True
+        return item
+
+    def process_item(self, item, spider):
+        d = Deferred()
+        d.addCallback(self.cb)
+        d.callback(item)
+        return d
+
+
+class AsyncDefPipeline:
+    async def process_item(self, item, spider):
+        await defer.succeed(42)
+        item['pipeline_passed'] = True
+        return item
+
+
+class AsyncDefAsyncioPipeline:
+    async def process_item(self, item, spider):
+        await asyncio.sleep(0.2)
+        item['pipeline_passed'] = await get_from_asyncio_queue(True)
+        return item
+
+
+class ItemSpider(Spider):
+    name = 'itemspider'
+
+    def start_requests(self):
+        yield Request(self.mockserver.url('/status?n=200'))
+
+    def parse(self, response):
+        return {'field': 42}
+
+
+class PipelineTestCase(unittest.TestCase):
+    def setUp(self):
+        self.mockserver = MockServer()
+        self.mockserver.__enter__()
+
+    def tearDown(self):
+        self.mockserver.__exit__(None, None, None)
+
+    def _on_item_scraped(self, item):
+        self.assertIsInstance(item, dict)
+        self.assertTrue(item.get('pipeline_passed'))
+        self.items.append(item)
+
+    def _create_crawler(self, pipeline_class):
+        settings = {
+            'ITEM_PIPELINES': {__name__ + '.' + pipeline_class.__name__: 1},
+        }
+        crawler = get_crawler(ItemSpider, settings)
+        crawler.signals.connect(self._on_item_scraped, signals.item_scraped)
+        self.items = []
+        return crawler
+
+    @defer.inlineCallbacks
+    def test_simple_pipeline(self):
+        crawler = self._create_crawler(SimplePipeline)
+        yield crawler.crawl(mockserver=self.mockserver)
+        self.assertEqual(len(self.items), 1)
+
+    @defer.inlineCallbacks
+    def test_deferred_pipeline(self):
+        crawler = self._create_crawler(DeferredPipeline)
+        yield crawler.crawl(mockserver=self.mockserver)
+        self.assertEqual(len(self.items), 1)
+
+    @defer.inlineCallbacks
+    def test_asyncdef_pipeline(self):
+        crawler = self._create_crawler(AsyncDefPipeline)
+        yield crawler.crawl(mockserver=self.mockserver)
+        self.assertEqual(len(self.items), 1)
+
+    @mark.only_asyncio()
+    @defer.inlineCallbacks
+    def test_asyncdef_asyncio_pipeline(self):
+        crawler = self._create_crawler(AsyncDefAsyncioPipeline)
+        yield crawler.crawl(mockserver=self.mockserver)
+        self.assertEqual(len(self.items), 1)
diff --git a/tests/test_proxy_connect.py b/tests/test_proxy_connect.py
index 8999e102e..a56e3c39a 100644
--- a/tests/test_proxy_connect.py
+++ b/tests/test_proxy_connect.py
@@ -1,102 +1,125 @@
 import json
 import os
-import time
-
-from threading import Thread
-from libmproxy import controller, proxy
-from netlib import http_auth
+import platform
+import re
+import sys
+from subprocess import Popen, PIPE
+from urllib.parse import urlsplit, urlunsplit
+from unittest import skipIf
 
+import pytest
+from testfixtures import LogCapture
 from twisted.internet import defer
 from twisted.trial.unittest import TestCase
-from scrapy.utils.test import get_testlog, docrawl
+
 from scrapy.http import Request
-from tests.spiders import SimpleSpider, SingleRequestSpider
+from scrapy.utils.test import get_crawler
+
 from tests.mockserver import MockServer
+from tests.spiders import SimpleSpider, SingleRequestSpider
 
 
-class HTTPSProxy(controller.Master, Thread):
+class MitmProxy:
+    auth_user = 'scrapy'
+    auth_pass = 'scrapy'
 
-    def __init__(self, port):
-        password_manager = http_auth.PassManSingleUser('scrapy', 'scrapy')
-        authenticator = http_auth.BasicProxyAuth(password_manager, "mitmproxy")
+    def start(self):
+        from scrapy.utils.test import get_testenv
+        script = """
+import sys
+from mitmproxy.tools.main import mitmdump
+sys.argv[0] = "mitmdump"
+sys.exit(mitmdump())
+        """
         cert_path = os.path.join(os.path.abspath(os.path.dirname(__file__)),
-            'keys', 'mitmproxy-ca.pem')
-        server = proxy.ProxyServer(proxy.ProxyConfig(
-            authenticator = authenticator,
-            cacert = cert_path),
-            port)
-        Thread.__init__(self)
-        controller.Master.__init__(self, server)
+                                 'keys', 'mitmproxy-ca.pem')
+        self.proc = Popen([sys.executable,
+                           '-c', script,
+                           '--listen-host', '127.0.0.1',
+                           '--listen-port', '0',
+                           '--proxyauth', '%s:%s' % (self.auth_user, self.auth_pass),
+                           '--certs', cert_path,
+                           '--ssl-insecure',
+                           ],
+                          stdout=PIPE, env=get_testenv())
+        line = self.proc.stdout.readline().decode('utf-8')
+        host_port = re.search(r'listening at http://([^:]+:\d+)', line).group(1)
+        address = 'http://%s:%s@%s' % (self.auth_user, self.auth_pass, host_port)
+        return address
+
+    def stop(self):
+        self.proc.kill()
+        self.proc.communicate()
 
 
+def _wrong_credentials(proxy_url):
+    bad_auth_proxy = list(urlsplit(proxy_url))
+    bad_auth_proxy[1] = bad_auth_proxy[1].replace('scrapy:scrapy@', 'wrong:wronger@')
+    return urlunsplit(bad_auth_proxy)
+
+
+@skipIf(sys.version_info < (3, 5, 4),
+        "requires mitmproxy < 3.0.0, which these tests do not support")
+@skipIf("pypy" in sys.executable,
+        "mitmproxy does not support PyPy")
+@skipIf(platform.system() == 'Windows' and sys.version_info < (3, 7),
+        "mitmproxy does not support Windows when running Python < 3.7")
 class ProxyConnectTestCase(TestCase):
 
     def setUp(self):
         self.mockserver = MockServer()
         self.mockserver.__enter__()
         self._oldenv = os.environ.copy()
-        self._proxy = HTTPSProxy(8888)
-        self._proxy.start()
-        # Wait for the proxy to start.
-        time.sleep(1.0)
-        os.environ['http_proxy'] = 'http://scrapy:scrapy@localhost:8888'
-        os.environ['https_proxy'] = 'http://scrapy:scrapy@localhost:8888'
+
+        self._proxy = MitmProxy()
+        proxy_url = self._proxy.start()
+        os.environ['https_proxy'] = proxy_url
+        os.environ['http_proxy'] = proxy_url
 
     def tearDown(self):
         self.mockserver.__exit__(None, None, None)
-        self._proxy.shutdown()
+        self._proxy.stop()
         os.environ = self._oldenv
 
     @defer.inlineCallbacks
     def test_https_connect_tunnel(self):
-        spider = SimpleSpider("https://localhost:8999/status?n=200")
-        yield docrawl(spider)
-        self._assert_got_response_code(200)
-
-    @defer.inlineCallbacks
-    def test_https_noconnect(self):
-        os.environ['https_proxy'] = 'http://scrapy:scrapy@localhost:8888?noconnect'
-        spider = SimpleSpider("https://localhost:8999/status?n=200")
-        yield docrawl(spider)
-        self._assert_got_response_code(200)
-        os.environ['https_proxy'] = 'http://scrapy:scrapy@localhost:8888'
+        crawler = get_crawler(SimpleSpider)
+        with LogCapture() as log:
+            yield crawler.crawl(self.mockserver.url("/status?n=200", is_secure=True))
+        self._assert_got_response_code(200, log)
 
+    @pytest.mark.xfail(reason='Python 3.6+ fails this earlier', condition=sys.version_info >= (3, 6))
     @defer.inlineCallbacks
     def test_https_connect_tunnel_error(self):
-        spider = SimpleSpider("https://localhost:99999/status?n=200")
-        yield docrawl(spider)
-        self._assert_got_tunnel_error()
+        crawler = get_crawler(SimpleSpider)
+        with LogCapture() as log:
+            yield crawler.crawl("https://localhost:99999/status?n=200")
+        self._assert_got_tunnel_error(log)
 
     @defer.inlineCallbacks
     def test_https_tunnel_auth_error(self):
-        os.environ['https_proxy'] = 'http://wrong:wronger@localhost:8888'
-        spider = SimpleSpider("https://localhost:8999/status?n=200")
-        yield docrawl(spider)
+        os.environ['https_proxy'] = _wrong_credentials(os.environ['https_proxy'])
+        crawler = get_crawler(SimpleSpider)
+        with LogCapture() as log:
+            yield crawler.crawl(self.mockserver.url("/status?n=200", is_secure=True))
         # The proxy returns a 407 error code but it does not reach the client;
         # he just sees a TunnelError.
-        self._assert_got_tunnel_error()
-        os.environ['https_proxy'] = 'http://scrapy:scrapy@localhost:8888'
+        self._assert_got_tunnel_error(log)
 
     @defer.inlineCallbacks
     def test_https_tunnel_without_leak_proxy_authorization_header(self):
-        request = Request("https://localhost:8999/echo")
-        spider = SingleRequestSpider(seed=request)
-        yield docrawl(spider)
-        self._assert_got_response_code(200)
-        echo = json.loads(spider.meta['responses'][0].body)
+        request = Request(self.mockserver.url("/echo", is_secure=True))
+        crawler = get_crawler(SingleRequestSpider)
+        with LogCapture() as log:
+            yield crawler.crawl(seed=request)
+        self._assert_got_response_code(200, log)
+        echo = json.loads(crawler.spider.meta['responses'][0].text)
         self.assertTrue('Proxy-Authorization' not in echo['headers'])
 
-    @defer.inlineCallbacks
-    def test_https_noconnect_auth_error(self):
-        os.environ['https_proxy'] = 'http://wrong:wronger@localhost:8888?noconnect'
-        spider = SimpleSpider("https://localhost:8999/status?n=200")
-        yield docrawl(spider)
-        self._assert_got_response_code(407)
+    def _assert_got_response_code(self, code, log):
+        print(log)
+        self.assertEqual(str(log).count('Crawled (%d)' % code), 1)
 
-    def _assert_got_response_code(self, code):
-        log = get_testlog()
-        self.assertEqual(log.count('Crawled (%d)' % code), 1)
-
-    def _assert_got_tunnel_error(self):
-        log = get_testlog()
-        self.assertEqual(log.count('TunnelError'), 1)
+    def _assert_got_tunnel_error(self, log):
+        print(log)
+        self.assertIn('TunnelError', str(log))
diff --git a/tests/test_request_attribute_binding.py b/tests/test_request_attribute_binding.py
new file mode 100644
index 000000000..b60b7c579
--- /dev/null
+++ b/tests/test_request_attribute_binding.py
@@ -0,0 +1,202 @@
+from twisted.internet import defer
+from twisted.trial.unittest import TestCase
+
+from scrapy import Request, signals
+from scrapy.crawler import CrawlerRunner
+from scrapy.http.response import Response
+
+from testfixtures import LogCapture
+
+from tests.mockserver import MockServer
+from tests.spiders import SingleRequestSpider
+
+
+OVERRIDEN_URL = "https://example.org"
+
+
+class ProcessResponseMiddleware:
+    def process_response(self, request, response, spider):
+        return response.replace(request=Request(OVERRIDEN_URL))
+
+
+class RaiseExceptionRequestMiddleware:
+    def process_request(self, request, spider):
+        1 / 0
+        return request
+
+
+class CatchExceptionOverrideRequestMiddleware:
+    def process_exception(self, request, exception, spider):
+        return Response(
+            url="http://localhost/",
+            body=b"Caught " + exception.__class__.__name__.encode("utf-8"),
+            request=Request(OVERRIDEN_URL),
+        )
+
+
+class CatchExceptionDoNotOverrideRequestMiddleware:
+    def process_exception(self, request, exception, spider):
+        return Response(
+            url="http://localhost/",
+            body=b"Caught " + exception.__class__.__name__.encode("utf-8"),
+        )
+
+
+class AlternativeCallbacksSpider(SingleRequestSpider):
+    name = "alternative_callbacks_spider"
+
+    def alt_callback(self, response, foo=None):
+        self.logger.info("alt_callback was invoked with foo=%s", foo)
+
+
+class AlternativeCallbacksMiddleware:
+    def process_response(self, request, response, spider):
+        new_request = request.replace(
+            url=OVERRIDEN_URL,
+            callback=spider.alt_callback,
+            cb_kwargs={"foo": "bar"},
+        )
+        return response.replace(request=new_request)
+
+
+class CrawlTestCase(TestCase):
+
+    def setUp(self):
+        self.mockserver = MockServer()
+        self.mockserver.__enter__()
+
+    def tearDown(self):
+        self.mockserver.__exit__(None, None, None)
+
+    @defer.inlineCallbacks
+    def test_response_200(self):
+        url = self.mockserver.url("/status?n=200")
+        crawler = CrawlerRunner().create_crawler(SingleRequestSpider)
+        yield crawler.crawl(seed=url, mockserver=self.mockserver)
+        response = crawler.spider.meta["responses"][0]
+        self.assertEqual(response.request.url, url)
+
+    @defer.inlineCallbacks
+    def test_response_error(self):
+        for status in ("404", "500"):
+            url = self.mockserver.url("/status?n={}".format(status))
+            crawler = CrawlerRunner().create_crawler(SingleRequestSpider)
+            yield crawler.crawl(seed=url, mockserver=self.mockserver)
+            failure = crawler.spider.meta["failure"]
+            response = failure.value.response
+            self.assertEqual(failure.request.url, url)
+            self.assertEqual(response.request.url, url)
+
+    @defer.inlineCallbacks
+    def test_downloader_middleware_raise_exception(self):
+        url = self.mockserver.url("/status?n=200")
+        runner = CrawlerRunner(settings={
+            "DOWNLOADER_MIDDLEWARES": {
+                __name__ + ".RaiseExceptionRequestMiddleware": 590,
+            },
+        })
+        crawler = runner.create_crawler(SingleRequestSpider)
+        yield crawler.crawl(seed=url, mockserver=self.mockserver)
+        failure = crawler.spider.meta["failure"]
+        self.assertEqual(failure.request.url, url)
+        self.assertIsInstance(failure.value, ZeroDivisionError)
+
+    @defer.inlineCallbacks
+    def test_downloader_middleware_override_request_in_process_response(self):
+        """
+        Downloader middleware which returns a response with an specific 'request' attribute.
+
+        * The spider callback should receive the overriden response.request
+        * Handlers listening to the response_received signal should receive the overriden response.request
+        * The "crawled" log message should show the overriden response.request
+        """
+        signal_params = {}
+
+        def signal_handler(response, request, spider):
+            signal_params["response"] = response
+            signal_params["request"] = request
+
+        url = self.mockserver.url("/status?n=200")
+        runner = CrawlerRunner(settings={
+            "DOWNLOADER_MIDDLEWARES": {
+                __name__ + ".ProcessResponseMiddleware": 595,
+            }
+        })
+        crawler = runner.create_crawler(SingleRequestSpider)
+        crawler.signals.connect(signal_handler, signal=signals.response_received)
+
+        with LogCapture() as log:
+            yield crawler.crawl(seed=url, mockserver=self.mockserver)
+
+        response = crawler.spider.meta["responses"][0]
+        self.assertEqual(response.request.url, OVERRIDEN_URL)
+
+        self.assertEqual(signal_params["response"].url, url)
+        self.assertEqual(signal_params["request"].url, OVERRIDEN_URL)
+
+        log.check_present(
+            ("scrapy.core.engine", "DEBUG", "Crawled (200) <GET {}> (referer: None)".format(OVERRIDEN_URL)),
+        )
+
+    @defer.inlineCallbacks
+    def test_downloader_middleware_override_in_process_exception(self):
+        """
+        An exception is raised but caught by the next middleware, which
+        returns a Response with a specific 'request' attribute.
+
+        The spider callback should receive the overriden response.request
+        """
+        url = self.mockserver.url("/status?n=200")
+        runner = CrawlerRunner(settings={
+            "DOWNLOADER_MIDDLEWARES": {
+                __name__ + ".RaiseExceptionRequestMiddleware": 590,
+                __name__ + ".CatchExceptionOverrideRequestMiddleware": 595,
+            },
+        })
+        crawler = runner.create_crawler(SingleRequestSpider)
+        yield crawler.crawl(seed=url, mockserver=self.mockserver)
+        response = crawler.spider.meta["responses"][0]
+        self.assertEqual(response.body, b"Caught ZeroDivisionError")
+        self.assertEqual(response.request.url, OVERRIDEN_URL)
+
+    @defer.inlineCallbacks
+    def test_downloader_middleware_do_not_override_in_process_exception(self):
+        """
+        An exception is raised but caught by the next middleware, which
+        returns a Response without a specific 'request' attribute.
+
+        The spider callback should receive the original response.request
+        """
+        url = self.mockserver.url("/status?n=200")
+        runner = CrawlerRunner(settings={
+            "DOWNLOADER_MIDDLEWARES": {
+                __name__ + ".RaiseExceptionRequestMiddleware": 590,
+                __name__ + ".CatchExceptionDoNotOverrideRequestMiddleware": 595,
+            },
+        })
+        crawler = runner.create_crawler(SingleRequestSpider)
+        yield crawler.crawl(seed=url, mockserver=self.mockserver)
+        response = crawler.spider.meta["responses"][0]
+        self.assertEqual(response.body, b"Caught ZeroDivisionError")
+        self.assertEqual(response.request.url, url)
+
+    @defer.inlineCallbacks
+    def test_downloader_middleware_alternative_callback(self):
+        """
+        Downloader middleware which returns a response with a
+        specific 'request' attribute, with an alternative callback
+        """
+        runner = CrawlerRunner(settings={
+            "DOWNLOADER_MIDDLEWARES": {
+                __name__ + ".AlternativeCallbacksMiddleware": 595,
+            }
+        })
+        crawler = runner.create_crawler(AlternativeCallbacksSpider)
+
+        with LogCapture() as log:
+            url = self.mockserver.url("/status?n=200")
+            yield crawler.crawl(seed=url, mockserver=self.mockserver)
+
+        log.check_present(
+            ("alternative_callbacks_spider", "INFO", "alt_callback was invoked with foo=bar"),
+        )
diff --git a/tests/test_request_cb_kwargs.py b/tests/test_request_cb_kwargs.py
new file mode 100644
index 000000000..bd49179aa
--- /dev/null
+++ b/tests/test_request_cb_kwargs.py
@@ -0,0 +1,169 @@
+from testfixtures import LogCapture
+from twisted.internet import defer
+from twisted.trial.unittest import TestCase
+
+from scrapy.http import Request
+from scrapy.crawler import CrawlerRunner
+from tests.spiders import MockServerSpider
+from tests.mockserver import MockServer
+
+
+class InjectArgumentsDownloaderMiddleware:
+    """
+    Make sure downloader middlewares are able to update the keyword arguments
+    """
+    def process_request(self, request, spider):
+        if request.callback.__name__ == 'parse_downloader_mw':
+            request.cb_kwargs['from_process_request'] = True
+        return None
+
+    def process_response(self, request, response, spider):
+        if request.callback.__name__ == 'parse_downloader_mw':
+            request.cb_kwargs['from_process_response'] = True
+        return response
+
+
+class InjectArgumentsSpiderMiddleware:
+    """
+    Make sure spider middlewares are able to update the keyword arguments
+    """
+    def process_start_requests(self, start_requests, spider):
+        for request in start_requests:
+            if request.callback.__name__ == 'parse_spider_mw':
+                request.cb_kwargs['from_process_start_requests'] = True
+            yield request
+
+    def process_spider_input(self, response, spider):
+        request = response.request
+        if request.callback.__name__ == 'parse_spider_mw':
+            request.cb_kwargs['from_process_spider_input'] = True
+        return None
+
+    def process_spider_output(self, response, result, spider):
+        for element in result:
+            if isinstance(element, Request) and element.callback.__name__ == 'parse_spider_mw_2':
+                element.cb_kwargs['from_process_spider_output'] = True
+            yield element
+
+
+class KeywordArgumentsSpider(MockServerSpider):
+    name = 'kwargs'
+    custom_settings = {
+        'DOWNLOADER_MIDDLEWARES': {
+            __name__ + '.InjectArgumentsDownloaderMiddleware': 750,
+        },
+        'SPIDER_MIDDLEWARES': {
+            __name__ + '.InjectArgumentsSpiderMiddleware': 750,
+        },
+    }
+
+    checks = list()
+
+    def start_requests(self):
+        data = {'key': 'value', 'number': 123}
+        yield Request(self.mockserver.url('/first'), self.parse_first, cb_kwargs=data)
+        yield Request(self.mockserver.url('/general_with'), self.parse_general, cb_kwargs=data)
+        yield Request(self.mockserver.url('/general_without'), self.parse_general)
+        yield Request(self.mockserver.url('/no_kwargs'), self.parse_no_kwargs)
+        yield Request(self.mockserver.url('/default'), self.parse_default, cb_kwargs=data)
+        yield Request(self.mockserver.url('/takes_less'), self.parse_takes_less, cb_kwargs=data)
+        yield Request(self.mockserver.url('/takes_more'), self.parse_takes_more, cb_kwargs=data)
+        yield Request(self.mockserver.url('/downloader_mw'), self.parse_downloader_mw)
+        yield Request(self.mockserver.url('/spider_mw'), self.parse_spider_mw)
+
+    def parse_first(self, response, key, number):
+        self.checks.append(key == 'value')
+        self.checks.append(number == 123)
+        self.crawler.stats.inc_value('boolean_checks', 2)
+        yield response.follow(
+            self.mockserver.url('/two'),
+            self.parse_second,
+            cb_kwargs={'new_key': 'new_value'})
+
+    def parse_second(self, response, new_key):
+        self.checks.append(new_key == 'new_value')
+        self.crawler.stats.inc_value('boolean_checks')
+
+    def parse_general(self, response, **kwargs):
+        if response.url.endswith('/general_with'):
+            self.checks.append(kwargs['key'] == 'value')
+            self.checks.append(kwargs['number'] == 123)
+            self.crawler.stats.inc_value('boolean_checks', 2)
+        elif response.url.endswith('/general_without'):
+            self.checks.append(kwargs == {})
+            self.crawler.stats.inc_value('boolean_checks')
+
+    def parse_no_kwargs(self, response):
+        self.checks.append(response.url.endswith('/no_kwargs'))
+        self.crawler.stats.inc_value('boolean_checks')
+
+    def parse_default(self, response, key, number=None, default=99):
+        self.checks.append(response.url.endswith('/default'))
+        self.checks.append(key == 'value')
+        self.checks.append(number == 123)
+        self.checks.append(default == 99)
+        self.crawler.stats.inc_value('boolean_checks', 4)
+
+    def parse_takes_less(self, response, key):
+        """
+        Should raise
+        TypeError: parse_takes_less() got an unexpected keyword argument 'number'
+        """
+
+    def parse_takes_more(self, response, key, number, other):
+        """
+        Should raise
+        TypeError: parse_takes_more() missing 1 required positional argument: 'other'
+        """
+
+    def parse_downloader_mw(self, response, from_process_request, from_process_response):
+        self.checks.append(bool(from_process_request))
+        self.checks.append(bool(from_process_response))
+        self.crawler.stats.inc_value('boolean_checks', 2)
+
+    def parse_spider_mw(self, response, from_process_spider_input, from_process_start_requests):
+        self.checks.append(bool(from_process_spider_input))
+        self.checks.append(bool(from_process_start_requests))
+        self.crawler.stats.inc_value('boolean_checks', 2)
+        return Request(self.mockserver.url('/spider_mw_2'), self.parse_spider_mw_2)
+
+    def parse_spider_mw_2(self, response, from_process_spider_output):
+        self.checks.append(bool(from_process_spider_output))
+        self.crawler.stats.inc_value('boolean_checks', 1)
+
+
+class CallbackKeywordArgumentsTestCase(TestCase):
+
+    maxDiff = None
+
+    def setUp(self):
+        self.mockserver = MockServer()
+        self.mockserver.__enter__()
+        self.runner = CrawlerRunner()
+
+    def tearDown(self):
+        self.mockserver.__exit__(None, None, None)
+
+    @defer.inlineCallbacks
+    def test_callback_kwargs(self):
+        crawler = self.runner.create_crawler(KeywordArgumentsSpider)
+        with LogCapture() as log:
+            yield crawler.crawl(mockserver=self.mockserver)
+        self.assertTrue(all(crawler.spider.checks))
+        self.assertEqual(len(crawler.spider.checks), crawler.stats.get_value('boolean_checks'))
+        # check exceptions for argument mismatch
+        exceptions = {}
+        for line in log.records:
+            for key in ('takes_less', 'takes_more'):
+                if key in line.getMessage():
+                    exceptions[key] = line
+        self.assertEqual(exceptions['takes_less'].exc_info[0], TypeError)
+        self.assertEqual(
+            str(exceptions['takes_less'].exc_info[1]),
+            "parse_takes_less() got an unexpected keyword argument 'number'"
+        )
+        self.assertEqual(exceptions['takes_more'].exc_info[0], TypeError)
+        self.assertEqual(
+            str(exceptions['takes_more'].exc_info[1]),
+            "parse_takes_more() missing 1 required positional argument: 'other'"
+        )
diff --git a/tests/test_request_left.py b/tests/test_request_left.py
new file mode 100644
index 000000000..373b2e49c
--- /dev/null
+++ b/tests/test_request_left.py
@@ -0,0 +1,60 @@
+from twisted.internet import defer
+from twisted.trial.unittest import TestCase
+from scrapy.signals import request_left_downloader
+from scrapy.spiders import Spider
+from scrapy.utils.test import get_crawler
+from tests.mockserver import MockServer
+
+
+class SignalCatcherSpider(Spider):
+    name = 'signal_catcher'
+
+    def __init__(self, crawler, url, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        crawler.signals.connect(self.on_request_left,
+                                signal=request_left_downloader)
+        self.caught_times = 0
+        self.start_urls = [url]
+
+    @classmethod
+    def from_crawler(cls, crawler, *args, **kwargs):
+        spider = cls(crawler, *args, **kwargs)
+        return spider
+
+    def on_request_left(self, request, spider):
+        self.caught_times = self.caught_times + 1
+
+
+class TestCatching(TestCase):
+
+    def setUp(self):
+        self.mockserver = MockServer()
+        self.mockserver.__enter__()
+
+    def tearDown(self):
+        self.mockserver.__exit__(None, None, None)
+
+    @defer.inlineCallbacks
+    def test_success(self):
+        crawler = get_crawler(SignalCatcherSpider)
+        yield crawler.crawl(self.mockserver.url("/status?n=200"))
+        self.assertEqual(crawler.spider.caught_times, 1)
+
+    @defer.inlineCallbacks
+    def test_timeout(self):
+        crawler = get_crawler(SignalCatcherSpider,
+                              {'DOWNLOAD_TIMEOUT': 0.1})
+        yield crawler.crawl(self.mockserver.url("/delay?n=0.2"))
+        self.assertEqual(crawler.spider.caught_times, 1)
+
+    @defer.inlineCallbacks
+    def test_disconnect(self):
+        crawler = get_crawler(SignalCatcherSpider)
+        yield crawler.crawl(self.mockserver.url("/drop"))
+        self.assertEqual(crawler.spider.caught_times, 1)
+
+    @defer.inlineCallbacks
+    def test_noconnect(self):
+        crawler = get_crawler(SignalCatcherSpider)
+        yield crawler.crawl('http://thereisdefinetelynosuchdomain.com')
+        self.assertEqual(crawler.spider.caught_times, 1)
diff --git a/tests/test_responsetypes.py b/tests/test_responsetypes.py
index 942d22b99..a175f88ca 100644
--- a/tests/test_responsetypes.py
+++ b/tests/test_responsetypes.py
@@ -3,6 +3,7 @@ from scrapy.responsetypes import responsetypes
 
 from scrapy.http import Response, TextResponse, XmlResponse, HtmlResponse, Headers
 
+
 class ResponseTypesTest(unittest.TestCase):
 
     def test_from_filename(self):
@@ -20,8 +21,14 @@ class ResponseTypesTest(unittest.TestCase):
 
     def test_from_content_disposition(self):
         mappings = [
-            ('attachment; filename="data.xml"', XmlResponse),
-            ('attachment; filename=data.xml', XmlResponse),
+            (b'attachment; filename="data.xml"', XmlResponse),
+            (b'attachment; filename=data.xml', XmlResponse),
+            ('attachment;filename=data£.tar.gz'.encode('utf-8'), Response),
+            ('attachment;filename=dataµ.tar.gz'.encode('latin-1'), Response),
+            ('attachment;filename=data高.doc'.encode('gbk'), Response),
+            ('attachment;filename=دورهdata.html'.encode('cp720'), HtmlResponse),
+            ('attachment;filename=日本語版Wikipedia.xml'.encode('iso2022_jp'), XmlResponse),
+
         ]
         for source, cls in mappings:
             retcls = responsetypes.from_content_disposition(source)
@@ -35,6 +42,8 @@ class ResponseTypesTest(unittest.TestCase):
             ('application/vnd.wap.xhtml+xml; charset=utf-8', HtmlResponse),
             ('application/xml; charset=UTF-8', XmlResponse),
             ('application/octet-stream', Response),
+            ('application/x-json; encoding=UTF8;charset=UTF-8', TextResponse),
+            ('application/json-amazonui-streaming;charset=UTF-8', TextResponse),
         ]
         for source, cls in mappings:
             retcls = responsetypes.from_content_type(source)
@@ -42,20 +51,21 @@ class ResponseTypesTest(unittest.TestCase):
 
     def test_from_body(self):
         mappings = [
-            ('\x03\x02\xdf\xdd\x23', Response),
-            ('Some plain text\ndata with tabs\t and null bytes\0', TextResponse),
-            ('<html><head><title>Hello</title></head>', HtmlResponse),
-            ('<?xml version="1.0" encoding="utf-8"', XmlResponse),
+            (b'\x03\x02\xdf\xdd\x23', Response),
+            (b'Some plain text\ndata with tabs\t and null bytes\0', TextResponse),
+            (b'<html><head><title>Hello</title></head>', HtmlResponse),
+            (b'<?xml version="1.0" encoding="utf-8"', XmlResponse),
         ]
         for source, cls in mappings:
             retcls = responsetypes.from_body(source)
             assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)
-        
+
     def test_from_headers(self):
         mappings = [
             ({'Content-Type': ['text/html; charset=utf-8']}, HtmlResponse),
-            ({'Content-Type': ['application/octet-stream'], 'Content-Disposition': ['attachment; filename=data.txt']}, TextResponse),
             ({'Content-Type': ['text/html; charset=utf-8'], 'Content-Encoding': ['gzip']}, Response),
+            ({'Content-Type': ['application/octet-stream'],
+              'Content-Disposition': ['attachment; filename=data.txt']}, TextResponse),
         ]
         for source, cls in mappings:
             source = Headers(source)
@@ -67,8 +77,10 @@ class ResponseTypesTest(unittest.TestCase):
         mappings = [
             ({'url': 'http://www.example.com/data.csv'}, TextResponse),
             # headers takes precedence over url
-            ({'headers': Headers({'Content-Type': ['text/html; charset=utf-8']}), 'url': 'http://www.example.com/item/'}, HtmlResponse),
-            ({'headers': Headers({'Content-Disposition': ['attachment; filename="data.xml.gz"']}), 'url': 'http://www.example.com/page/'}, Response),
+            ({'headers': Headers({'Content-Type': ['text/html; charset=utf-8']}),
+              'url': 'http://www.example.com/item/'}, HtmlResponse),
+            ({'headers': Headers({'Content-Disposition': ['attachment; filename="data.xml.gz"']}),
+              'url': 'http://www.example.com/page/'}, Response),
 
 
         ]
@@ -80,5 +92,6 @@ class ResponseTypesTest(unittest.TestCase):
         # check that mime.types files shipped with scrapy are loaded
         self.assertEqual(responsetypes.mimetypes.guess_type('x.scrapytest')[0], 'x-scrapy/test')
 
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_robotstxt_interface.py b/tests/test_robotstxt_interface.py
new file mode 100644
index 000000000..4b15d0fab
--- /dev/null
+++ b/tests/test_robotstxt_interface.py
@@ -0,0 +1,162 @@
+# coding=utf-8
+from twisted.trial import unittest
+
+
+def reppy_available():
+    # check if reppy parser is installed
+    try:
+        from reppy.robots import Robots  # noqa: F401
+    except ImportError:
+        return False
+    return True
+
+
+def rerp_available():
+    # check if robotexclusionrulesparser is installed
+    try:
+        from robotexclusionrulesparser import RobotExclusionRulesParser  # noqa: F401
+    except ImportError:
+        return False
+    return True
+
+
+def protego_available():
+    # check if protego parser is installed
+    try:
+        from protego import Protego  # noqa: F401
+    except ImportError:
+        return False
+    return True
+
+
+class BaseRobotParserTest:
+    def _setUp(self, parser_cls):
+        self.parser_cls = parser_cls
+
+    def test_allowed(self):
+        robotstxt_robotstxt_body = ("User-agent: * \n"
+                                    "Disallow: /disallowed \n"
+                                    "Allow: /allowed \n"
+                                    "Crawl-delay: 10".encode('utf-8'))
+        rp = self.parser_cls.from_crawler(crawler=None, robotstxt_body=robotstxt_robotstxt_body)
+        self.assertTrue(rp.allowed("https://www.site.local/allowed", "*"))
+        self.assertFalse(rp.allowed("https://www.site.local/disallowed", "*"))
+
+    def test_allowed_wildcards(self):
+        robotstxt_robotstxt_body = """User-agent: first
+                                Disallow: /disallowed/*/end$
+
+                                User-agent: second
+                                Allow: /*allowed
+                                Disallow: /
+                                """.encode('utf-8')
+        rp = self.parser_cls.from_crawler(crawler=None, robotstxt_body=robotstxt_robotstxt_body)
+
+        self.assertTrue(rp.allowed("https://www.site.local/disallowed", "first"))
+        self.assertFalse(rp.allowed("https://www.site.local/disallowed/xyz/end", "first"))
+        self.assertFalse(rp.allowed("https://www.site.local/disallowed/abc/end", "first"))
+        self.assertTrue(rp.allowed("https://www.site.local/disallowed/xyz/endinglater", "first"))
+
+        self.assertTrue(rp.allowed("https://www.site.local/allowed", "second"))
+        self.assertTrue(rp.allowed("https://www.site.local/is_still_allowed", "second"))
+        self.assertTrue(rp.allowed("https://www.site.local/is_allowed_too", "second"))
+
+    def test_length_based_precedence(self):
+        robotstxt_robotstxt_body = ("User-agent: * \n"
+                                    "Disallow: / \n"
+                                    "Allow: /page".encode('utf-8'))
+        rp = self.parser_cls.from_crawler(crawler=None, robotstxt_body=robotstxt_robotstxt_body)
+        self.assertTrue(rp.allowed("https://www.site.local/page", "*"))
+
+    def test_order_based_precedence(self):
+        robotstxt_robotstxt_body = ("User-agent: * \n"
+                                    "Disallow: / \n"
+                                    "Allow: /page".encode('utf-8'))
+        rp = self.parser_cls.from_crawler(crawler=None, robotstxt_body=robotstxt_robotstxt_body)
+        self.assertFalse(rp.allowed("https://www.site.local/page", "*"))
+
+    def test_empty_response(self):
+        """empty response should equal 'allow all'"""
+        rp = self.parser_cls.from_crawler(crawler=None, robotstxt_body=b'')
+        self.assertTrue(rp.allowed("https://site.local/", "*"))
+        self.assertTrue(rp.allowed("https://site.local/", "chrome"))
+        self.assertTrue(rp.allowed("https://site.local/index.html", "*"))
+        self.assertTrue(rp.allowed("https://site.local/disallowed", "*"))
+
+    def test_garbage_response(self):
+        """garbage response should be discarded, equal 'allow all'"""
+        robotstxt_robotstxt_body = b'GIF89a\xd3\x00\xfe\x00\xa2'
+        rp = self.parser_cls.from_crawler(crawler=None, robotstxt_body=robotstxt_robotstxt_body)
+        self.assertTrue(rp.allowed("https://site.local/", "*"))
+        self.assertTrue(rp.allowed("https://site.local/", "chrome"))
+        self.assertTrue(rp.allowed("https://site.local/index.html", "*"))
+        self.assertTrue(rp.allowed("https://site.local/disallowed", "*"))
+
+    def test_unicode_url_and_useragent(self):
+        robotstxt_robotstxt_body = """
+        User-Agent: *
+        Disallow: /admin/
+        Disallow: /static/
+        # taken from https://en.wikipedia.org/robots.txt
+        Disallow: /wiki/K%C3%A4ytt%C3%A4j%C3%A4:
+        Disallow: /wiki/Käyttäjä:
+
+        User-Agent: UnicödeBöt
+        Disallow: /some/randome/page.html""".encode('utf-8')
+        rp = self.parser_cls.from_crawler(crawler=None, robotstxt_body=robotstxt_robotstxt_body)
+        self.assertTrue(rp.allowed("https://site.local/", "*"))
+        self.assertFalse(rp.allowed("https://site.local/admin/", "*"))
+        self.assertFalse(rp.allowed("https://site.local/static/", "*"))
+        self.assertTrue(rp.allowed("https://site.local/admin/", "UnicödeBöt"))
+        self.assertFalse(rp.allowed("https://site.local/wiki/K%C3%A4ytt%C3%A4j%C3%A4:", "*"))
+        self.assertFalse(rp.allowed("https://site.local/wiki/Käyttäjä:", "*"))
+        self.assertTrue(rp.allowed("https://site.local/some/randome/page.html", "*"))
+        self.assertFalse(rp.allowed("https://site.local/some/randome/page.html", "UnicödeBöt"))
+
+
+class PythonRobotParserTest(BaseRobotParserTest, unittest.TestCase):
+    def setUp(self):
+        from scrapy.robotstxt import PythonRobotParser
+        super()._setUp(PythonRobotParser)
+
+    def test_length_based_precedence(self):
+        raise unittest.SkipTest("RobotFileParser does not support length based directives precedence.")
+
+    def test_allowed_wildcards(self):
+        raise unittest.SkipTest("RobotFileParser does not support wildcards.")
+
+
+class ReppyRobotParserTest(BaseRobotParserTest, unittest.TestCase):
+    if not reppy_available():
+        skip = "Reppy parser is not installed"
+
+    def setUp(self):
+        from scrapy.robotstxt import ReppyRobotParser
+        super()._setUp(ReppyRobotParser)
+
+    def test_order_based_precedence(self):
+        raise unittest.SkipTest("Reppy does not support order based directives precedence.")
+
+
+class RerpRobotParserTest(BaseRobotParserTest, unittest.TestCase):
+    if not rerp_available():
+        skip = "Rerp parser is not installed"
+
+    def setUp(self):
+        from scrapy.robotstxt import RerpRobotParser
+        super()._setUp(RerpRobotParser)
+
+    def test_length_based_precedence(self):
+        raise unittest.SkipTest("Rerp does not support length based directives precedence.")
+
+
+class ProtegoRobotParserTest(BaseRobotParserTest, unittest.TestCase):
+    if not protego_available():
+        skip = "Protego parser is not installed"
+
+    def setUp(self):
+        from scrapy.robotstxt import ProtegoRobotParser
+        super()._setUp(ProtegoRobotParser)
+
+    def test_order_based_precedence(self):
+        raise unittest.SkipTest("Protego does not support order based directives precedence.")
diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py
new file mode 100644
index 000000000..512a7460e
--- /dev/null
+++ b/tests/test_scheduler.py
@@ -0,0 +1,344 @@
+import shutil
+import tempfile
+import unittest
+import collections
+
+from twisted.internet import defer
+from twisted.trial.unittest import TestCase
+
+from scrapy.crawler import Crawler
+from scrapy.core.downloader import Downloader
+from scrapy.core.scheduler import Scheduler
+from scrapy.http import Request
+from scrapy.spiders import Spider
+from scrapy.utils.httpobj import urlparse_cached
+from scrapy.utils.test import get_crawler
+from tests.mockserver import MockServer
+
+
+MockEngine = collections.namedtuple('MockEngine', ['downloader'])
+MockSlot = collections.namedtuple('MockSlot', ['active'])
+
+
+class MockDownloader:
+    def __init__(self):
+        self.slots = dict()
+
+    def _get_slot_key(self, request, spider):
+        if Downloader.DOWNLOAD_SLOT in request.meta:
+            return request.meta[Downloader.DOWNLOAD_SLOT]
+
+        return urlparse_cached(request).hostname or ''
+
+    def increment(self, slot_key):
+        slot = self.slots.setdefault(slot_key, MockSlot(active=list()))
+        slot.active.append(1)
+
+    def decrement(self, slot_key):
+        slot = self.slots.get(slot_key)
+        slot.active.pop()
+
+    def close(self):
+        pass
+
+
+class MockCrawler(Crawler):
+    def __init__(self, priority_queue_cls, jobdir):
+
+        settings = dict(
+            SCHEDULER_DEBUG=False,
+            SCHEDULER_DISK_QUEUE='scrapy.squeues.PickleLifoDiskQueue',
+            SCHEDULER_MEMORY_QUEUE='scrapy.squeues.LifoMemoryQueue',
+            SCHEDULER_PRIORITY_QUEUE=priority_queue_cls,
+            JOBDIR=jobdir,
+            DUPEFILTER_CLASS='scrapy.dupefilters.BaseDupeFilter',
+        )
+        super().__init__(Spider, settings)
+        self.engine = MockEngine(downloader=MockDownloader())
+
+
+class SchedulerHandler:
+    priority_queue_cls = None
+    jobdir = None
+
+    def create_scheduler(self):
+        self.mock_crawler = MockCrawler(self.priority_queue_cls, self.jobdir)
+        self.scheduler = Scheduler.from_crawler(self.mock_crawler)
+        self.spider = Spider(name='spider')
+        self.scheduler.open(self.spider)
+
+    def close_scheduler(self):
+        self.scheduler.close('finished')
+        self.mock_crawler.stop()
+        self.mock_crawler.engine.downloader.close()
+
+    def setUp(self):
+        self.create_scheduler()
+
+    def tearDown(self):
+        self.close_scheduler()
+
+
+_PRIORITIES = [("http://foo.com/a", -2),
+               ("http://foo.com/d", 1),
+               ("http://foo.com/b", -1),
+               ("http://foo.com/c", 0),
+               ("http://foo.com/e", 2)]
+
+
+_URLS = {"http://foo.com/a", "http://foo.com/b", "http://foo.com/c"}
+
+
+class BaseSchedulerInMemoryTester(SchedulerHandler):
+    def test_length(self):
+        self.assertFalse(self.scheduler.has_pending_requests())
+        self.assertEqual(len(self.scheduler), 0)
+
+        for url in _URLS:
+            self.scheduler.enqueue_request(Request(url))
+
+        self.assertTrue(self.scheduler.has_pending_requests())
+        self.assertEqual(len(self.scheduler), len(_URLS))
+
+    def test_dequeue(self):
+        for url in _URLS:
+            self.scheduler.enqueue_request(Request(url))
+
+        urls = set()
+        while self.scheduler.has_pending_requests():
+            urls.add(self.scheduler.next_request().url)
+
+        self.assertEqual(urls, _URLS)
+
+    def test_dequeue_priorities(self):
+        for url, priority in _PRIORITIES:
+            self.scheduler.enqueue_request(Request(url, priority=priority))
+
+        priorities = list()
+        while self.scheduler.has_pending_requests():
+            priorities.append(self.scheduler.next_request().priority)
+
+        self.assertEqual(priorities,
+                         sorted([x[1] for x in _PRIORITIES], key=lambda x: -x))
+
+
+class BaseSchedulerOnDiskTester(SchedulerHandler):
+
+    def setUp(self):
+        self.jobdir = tempfile.mkdtemp()
+        self.create_scheduler()
+
+    def tearDown(self):
+        self.close_scheduler()
+
+        shutil.rmtree(self.jobdir)
+        self.jobdir = None
+
+    def test_length(self):
+        self.assertFalse(self.scheduler.has_pending_requests())
+        self.assertEqual(len(self.scheduler), 0)
+
+        for url in _URLS:
+            self.scheduler.enqueue_request(Request(url))
+
+        self.close_scheduler()
+        self.create_scheduler()
+
+        self.assertTrue(self.scheduler.has_pending_requests())
+        self.assertEqual(len(self.scheduler), len(_URLS))
+
+    def test_dequeue(self):
+        for url in _URLS:
+            self.scheduler.enqueue_request(Request(url))
+
+        self.close_scheduler()
+        self.create_scheduler()
+
+        urls = set()
+        while self.scheduler.has_pending_requests():
+            urls.add(self.scheduler.next_request().url)
+
+        self.assertEqual(urls, _URLS)
+
+    def test_dequeue_priorities(self):
+        for url, priority in _PRIORITIES:
+            self.scheduler.enqueue_request(Request(url, priority=priority))
+
+        self.close_scheduler()
+        self.create_scheduler()
+
+        priorities = list()
+        while self.scheduler.has_pending_requests():
+            priorities.append(self.scheduler.next_request().priority)
+
+        self.assertEqual(priorities,
+                         sorted([x[1] for x in _PRIORITIES], key=lambda x: -x))
+
+
+class TestSchedulerInMemory(BaseSchedulerInMemoryTester, unittest.TestCase):
+    priority_queue_cls = 'scrapy.pqueues.ScrapyPriorityQueue'
+
+
+class TestSchedulerOnDisk(BaseSchedulerOnDiskTester, unittest.TestCase):
+    priority_queue_cls = 'scrapy.pqueues.ScrapyPriorityQueue'
+
+
+_URLS_WITH_SLOTS = [("http://foo.com/a", 'a'),
+                    ("http://foo.com/b", 'a'),
+                    ("http://foo.com/c", 'b'),
+                    ("http://foo.com/d", 'b'),
+                    ("http://foo.com/e", 'c'),
+                    ("http://foo.com/f", 'c')]
+
+
+class TestMigration(unittest.TestCase):
+
+    def setUp(self):
+        self.tmpdir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.tmpdir)
+
+    def _migration(self, tmp_dir):
+        prev_scheduler_handler = SchedulerHandler()
+        prev_scheduler_handler.priority_queue_cls = 'scrapy.pqueues.ScrapyPriorityQueue'
+        prev_scheduler_handler.jobdir = tmp_dir
+
+        prev_scheduler_handler.create_scheduler()
+        for url in _URLS:
+            prev_scheduler_handler.scheduler.enqueue_request(Request(url))
+        prev_scheduler_handler.close_scheduler()
+
+        next_scheduler_handler = SchedulerHandler()
+        next_scheduler_handler.priority_queue_cls = 'scrapy.pqueues.DownloaderAwarePriorityQueue'
+        next_scheduler_handler.jobdir = tmp_dir
+
+        next_scheduler_handler.create_scheduler()
+
+    def test_migration(self):
+        with self.assertRaises(ValueError):
+            self._migration(self.tmpdir)
+
+
+def _is_scheduling_fair(enqueued_slots, dequeued_slots):
+    """
+    We enqueued same number of requests for every slot.
+    Assert correct order, e.g.
+
+    >>> enqueued = ['a', 'b', 'c'] * 2
+    >>> correct = ['a', 'c', 'b', 'b', 'a', 'c']
+    >>> incorrect = ['a', 'a', 'b', 'c', 'c', 'b']
+    >>> _is_scheduling_fair(enqueued, correct)
+    True
+    >>> _is_scheduling_fair(enqueued, incorrect)
+    False
+    """
+    if len(dequeued_slots) != len(enqueued_slots):
+        return False
+
+    slots_number = len(set(enqueued_slots))
+    for i in range(0, len(dequeued_slots), slots_number):
+        part = dequeued_slots[i:i + slots_number]
+        if len(part) != len(set(part)):
+            return False
+
+    return True
+
+
+class DownloaderAwareSchedulerTestMixin:
+    priority_queue_cls = 'scrapy.pqueues.DownloaderAwarePriorityQueue'
+    reopen = False
+
+    def test_logic(self):
+        for url, slot in _URLS_WITH_SLOTS:
+            request = Request(url)
+            request.meta[Downloader.DOWNLOAD_SLOT] = slot
+            self.scheduler.enqueue_request(request)
+
+        if self.reopen:
+            self.close_scheduler()
+            self.create_scheduler()
+
+        dequeued_slots = list()
+        requests = []
+        downloader = self.mock_crawler.engine.downloader
+        while self.scheduler.has_pending_requests():
+            request = self.scheduler.next_request()
+            # pylint: disable=protected-access
+            slot = downloader._get_slot_key(request, None)
+            dequeued_slots.append(slot)
+            downloader.increment(slot)
+            requests.append(request)
+
+        for request in requests:
+            # pylint: disable=protected-access
+            slot = downloader._get_slot_key(request, None)
+            downloader.decrement(slot)
+
+        self.assertTrue(_is_scheduling_fair(list(s for u, s in _URLS_WITH_SLOTS),
+                                            dequeued_slots))
+        self.assertEqual(sum(len(s.active) for s in downloader.slots.values()), 0)
+
+
+class TestSchedulerWithDownloaderAwareInMemory(DownloaderAwareSchedulerTestMixin,
+                                               BaseSchedulerInMemoryTester,
+                                               unittest.TestCase):
+    pass
+
+
+class TestSchedulerWithDownloaderAwareOnDisk(DownloaderAwareSchedulerTestMixin,
+                                             BaseSchedulerOnDiskTester,
+                                             unittest.TestCase):
+    reopen = True
+
+
+class StartUrlsSpider(Spider):
+
+    def __init__(self, start_urls):
+        self.start_urls = start_urls
+        super().__init__(name='StartUrlsSpider')
+
+    def parse(self, response):
+        pass
+
+
+class TestIntegrationWithDownloaderAwareInMemory(TestCase):
+    def setUp(self):
+        self.crawler = get_crawler(
+            spidercls=StartUrlsSpider,
+            settings_dict={
+                'SCHEDULER_PRIORITY_QUEUE': 'scrapy.pqueues.DownloaderAwarePriorityQueue',
+                'DUPEFILTER_CLASS': 'scrapy.dupefilters.BaseDupeFilter',
+            },
+        )
+
+    @defer.inlineCallbacks
+    def tearDown(self):
+        yield self.crawler.stop()
+
+    @defer.inlineCallbacks
+    def test_integration_downloader_aware_priority_queue(self):
+        with MockServer() as mockserver:
+
+            url = mockserver.url("/status?n=200", is_secure=False)
+            start_urls = [url] * 6
+            yield self.crawler.crawl(start_urls)
+            self.assertEqual(self.crawler.stats.get_value('downloader/response_count'),
+                             len(start_urls))
+
+
+class TestIncompatibility(unittest.TestCase):
+
+    def _incompatible(self):
+        settings = dict(
+            SCHEDULER_PRIORITY_QUEUE='scrapy.pqueues.DownloaderAwarePriorityQueue',
+            CONCURRENT_REQUESTS_PER_IP=1,
+        )
+        crawler = Crawler(Spider, settings)
+        scheduler = Scheduler.from_crawler(crawler)
+        spider = Spider(name='spider')
+        scheduler.open(spider)
+
+    def test_incompatibility(self):
+        with self.assertRaises(ValueError):
+            self._incompatible()
diff --git a/tests/test_selector.py b/tests/test_selector.py
index 6fbb451a6..62036ad8c 100644
--- a/tests/test_selector.py
+++ b/tests/test_selector.py
@@ -1,584 +1,95 @@
-import re
-import warnings
 import weakref
+
 from twisted.trial import unittest
-from scrapy.exceptions import ScrapyDeprecationWarning
+
 from scrapy.http import TextResponse, HtmlResponse, XmlResponse
 from scrapy.selector import Selector
-from scrapy.selector.lxmlsel import XmlXPathSelector, HtmlXPathSelector, XPathSelector
 
 
 class SelectorTestCase(unittest.TestCase):
 
-    sscls = Selector
-
     def test_simple_selection(self):
         """Simple selector tests"""
-        body = "<p><input name='a'value='1'/><input name='b'value='2'/></p>"
-        response = TextResponse(url="http://example.com", body=body)
-        sel = self.sscls(response)
+        body = b"<p><input name='a'value='1'/><input name='b'value='2'/></p>"
+        response = TextResponse(url="http://example.com", body=body, encoding='utf-8')
+        sel = Selector(response)
 
         xl = sel.xpath('//input')
         self.assertEqual(2, len(xl))
         for x in xl:
-            assert isinstance(x, self.sscls)
-
-        self.assertEqual(sel.xpath('//input').extract(),
-                         [x.extract() for x in sel.xpath('//input')])
-
-        self.assertEqual([x.extract() for x in sel.xpath("//input[@name='a']/@name")],
-                         [u'a'])
-        self.assertEqual([x.extract() for x in sel.xpath("number(concat(//input[@name='a']/@value, //input[@name='b']/@value))")],
-                         [u'12.0'])
-
-        self.assertEqual(sel.xpath("concat('xpath', 'rules')").extract(),
-                         [u'xpathrules'])
-        self.assertEqual([x.extract() for x in sel.xpath("concat(//input[@name='a']/@value, //input[@name='b']/@value)")],
-                         [u'12'])
-
-    def test_representation_slice(self):
-        body = u"<p><input name='{}' value='\xa9'/></p>".format(50 * 'b')
-        response = TextResponse(url="http://example.com", body=body, encoding='utf8')
-        sel = self.sscls(response)
+            assert isinstance(x, Selector)
 
         self.assertEqual(
-            map(repr, sel.xpath('//input/@name')),
-            ["<Selector xpath='//input/@name' data=u'{}'>".format(40 * 'b')]
+            sel.xpath('//input').getall(),
+            [x.get() for x in sel.xpath('//input')]
         )
-
-    def test_representation_unicode_query(self):
-        body = u"<p><input name='{}' value='\xa9'/></p>".format(50 * 'b')
-        response = TextResponse(url="http://example.com", body=body, encoding='utf8')
-        sel = self.sscls(response)
         self.assertEqual(
-            map(repr, sel.xpath(u'//input[@value="\xa9"]/@value')),
-            ["<Selector xpath=u'//input[@value=\"\\xa9\"]/@value' data=u'\\xa9'>"]
+            [x.get() for x in sel.xpath("//input[@name='a']/@name")],
+            ['a']
+        )
+        self.assertEqual(
+            [x.get() for x in sel.xpath("number(concat(//input[@name='a']/@value, //input[@name='b']/@value))")],
+            ['12.0']
+        )
+        self.assertEqual(
+            sel.xpath("concat('xpath', 'rules')").getall(),
+            ['xpathrules']
+        )
+        self.assertEqual(
+            [x.get() for x in sel.xpath("concat(//input[@name='a']/@value, //input[@name='b']/@value)")],
+            ['12']
         )
 
-    def test_select_unicode_query(self):
-        body = u"<p><input name='\xa9' value='1'/></p>"
-        response = TextResponse(url="http://example.com", body=body, encoding='utf8')
-        sel = self.sscls(response)
-        self.assertEqual(sel.xpath(u'//input[@name="\xa9"]/@value').extract(), [u'1'])
-
-    def test_list_elements_type(self):
-        """Test Selector returning the same type in selection methods"""
-        text = '<p>test<p>'
-        assert isinstance(self.sscls(text=text).xpath("//p")[0], self.sscls)
-        assert isinstance(self.sscls(text=text).css("p")[0], self.sscls)
-
-    def test_boolean_result(self):
-        body = "<p><input name='a'value='1'/><input name='b'value='2'/></p>"
-        response = TextResponse(url="http://example.com", body=body)
-        xs = self.sscls(response)
-        self.assertEquals(xs.xpath("//input[@name='a']/@name='a'").extract(), [u'1'])
-        self.assertEquals(xs.xpath("//input[@name='a']/@name='n'").extract(), [u'0'])
-
-    def test_differences_parsing_xml_vs_html(self):
-        """Test that XML and HTML Selector's behave differently"""
-        # some text which is parsed differently by XML and HTML flavors
-        text = '<div><img src="a.jpg"><p>Hello</div>'
-        hs = self.sscls(text=text, type='html')
-        self.assertEqual(hs.xpath("//div").extract(),
-                         [u'<div><img src="a.jpg"><p>Hello</p></div>'])
-
-        xs = self.sscls(text=text, type='xml')
-        self.assertEqual(xs.xpath("//div").extract(),
-                         [u'<div><img src="a.jpg"><p>Hello</p></img></div>'])
+    def test_root_base_url(self):
+        body = b'<html><form action="/path"><input name="a" /></form></html>'
+        url = "http://example.com"
+        response = TextResponse(url=url, body=body, encoding='utf-8')
+        sel = Selector(response)
+        self.assertEqual(url, sel.root.base)
 
     def test_flavor_detection(self):
-        text = '<div><img src="a.jpg"><p>Hello</div>'
-        sel = self.sscls(XmlResponse('http://example.com', body=text))
+        text = b'<div><img src="a.jpg"><p>Hello</div>'
+        sel = Selector(XmlResponse('http://example.com', body=text, encoding='utf-8'))
         self.assertEqual(sel.type, 'xml')
-        self.assertEqual(sel.xpath("//div").extract(),
-                         [u'<div><img src="a.jpg"><p>Hello</p></img></div>'])
+        self.assertEqual(sel.xpath("//div").getall(),
+                         ['<div><img src="a.jpg"><p>Hello</p></img></div>'])
 
-        sel = self.sscls(HtmlResponse('http://example.com', body=text))
+        sel = Selector(HtmlResponse('http://example.com', body=text, encoding='utf-8'))
         self.assertEqual(sel.type, 'html')
-        self.assertEqual(sel.xpath("//div").extract(),
-                         [u'<div><img src="a.jpg"><p>Hello</p></div>'])
-
-    def test_nested_selectors(self):
-        """Nested selector tests"""
-        body = """<body>
-                    <div class='one'>
-                      <ul>
-                        <li>one</li><li>two</li>
-                      </ul>
-                    </div>
-                    <div class='two'>
-                      <ul>
-                        <li>four</li><li>five</li><li>six</li>
-                      </ul>
-                    </div>
-                  </body>"""
-
-        response = HtmlResponse(url="http://example.com", body=body)
-        x = self.sscls(response)
-        divtwo = x.xpath('//div[@class="two"]')
-        self.assertEqual(divtwo.xpath("//li").extract(),
-                         ["<li>one</li>", "<li>two</li>", "<li>four</li>", "<li>five</li>", "<li>six</li>"])
-        self.assertEqual(divtwo.xpath("./ul/li").extract(),
-                         ["<li>four</li>", "<li>five</li>", "<li>six</li>"])
-        self.assertEqual(divtwo.xpath(".//li").extract(),
-                         ["<li>four</li>", "<li>five</li>", "<li>six</li>"])
-        self.assertEqual(divtwo.xpath("./li").extract(), [])
-
-    def test_mixed_nested_selectors(self):
-        body = '''<body>
-                    <div id=1>not<span>me</span></div>
-                    <div class="dos"><p>text</p><a href='#'>foo</a></div>
-               </body>'''
-        sel = self.sscls(text=body)
-        self.assertEqual(sel.xpath('//div[@id="1"]').css('span::text').extract(), [u'me'])
-        self.assertEqual(sel.css('#1').xpath('./span/text()').extract(), [u'me'])
-
-    def test_dont_strip(self):
-        sel = self.sscls(text='<div>fff: <a href="#">zzz</a></div>')
-        self.assertEqual(sel.xpath("//text()").extract(), [u'fff: ', u'zzz'])
-
-    def test_namespaces_simple(self):
-        body = """
-        <test xmlns:somens="http://scrapy.org">
-           <somens:a id="foo">take this</a>
-           <a id="bar">found</a>
-        </test>
-        """
-
-        response = XmlResponse(url="http://example.com", body=body)
-        x = self.sscls(response)
-
-        x.register_namespace("somens", "http://scrapy.org")
-        self.assertEqual(x.xpath("//somens:a/text()").extract(),
-                         [u'take this'])
-
-    def test_namespaces_multiple(self):
-        body = """<?xml version="1.0" encoding="UTF-8"?>
-<BrowseNode xmlns="http://webservices.amazon.com/AWSECommerceService/2005-10-05"
-            xmlns:b="http://somens.com"
-            xmlns:p="http://www.scrapy.org/product" >
-    <b:Operation>hello</b:Operation>
-    <TestTag b:att="value"><Other>value</Other></TestTag>
-    <p:SecondTestTag><material>iron</material><price>90</price><p:name>Dried Rose</p:name></p:SecondTestTag>
-</BrowseNode>
-        """
-        response = XmlResponse(url="http://example.com", body=body)
-        x = self.sscls(response)
-        x.register_namespace("xmlns", "http://webservices.amazon.com/AWSECommerceService/2005-10-05")
-        x.register_namespace("p", "http://www.scrapy.org/product")
-        x.register_namespace("b", "http://somens.com")
-        self.assertEqual(len(x.xpath("//xmlns:TestTag")), 1)
-        self.assertEqual(x.xpath("//b:Operation/text()").extract()[0], 'hello')
-        self.assertEqual(x.xpath("//xmlns:TestTag/@b:att").extract()[0], 'value')
-        self.assertEqual(x.xpath("//p:SecondTestTag/xmlns:price/text()").extract()[0], '90')
-        self.assertEqual(x.xpath("//p:SecondTestTag").xpath("./xmlns:price/text()")[0].extract(), '90')
-        self.assertEqual(x.xpath("//p:SecondTestTag/xmlns:material/text()").extract()[0], 'iron')
-
-    def test_re(self):
-        body = """<div>Name: Mary
-                    <ul>
-                      <li>Name: John</li>
-                      <li>Age: 10</li>
-                      <li>Name: Paul</li>
-                      <li>Age: 20</li>
-                    </ul>
-                    Age: 20
-                  </div>"""
-        response = HtmlResponse(url="http://example.com", body=body)
-        x = self.sscls(response)
-
-        name_re = re.compile("Name: (\w+)")
-        self.assertEqual(x.xpath("//ul/li").re(name_re),
-                         ["John", "Paul"])
-        self.assertEqual(x.xpath("//ul/li").re("Age: (\d+)"),
-                         ["10", "20"])
-
-    def test_re_intl(self):
-        body = """<div>Evento: cumplea\xc3\xb1os</div>"""
-        response = HtmlResponse(url="http://example.com", body=body, encoding='utf-8')
-        x = self.sscls(response)
-        self.assertEqual(x.xpath("//div").re("Evento: (\w+)"), [u'cumplea\xf1os'])
-
-    def test_selector_over_text(self):
-        hs = self.sscls(text='<root>lala</root>')
-        self.assertEqual(hs.extract(), u'<html><body><root>lala</root></body></html>')
-        xs = self.sscls(text='<root>lala</root>', type='xml')
-        self.assertEqual(xs.extract(), u'<root>lala</root>')
-        self.assertEqual(xs.xpath('.').extract(), [u'<root>lala</root>'])
-
-    def test_invalid_xpath(self):
-        response = XmlResponse(url="http://example.com", body="<html></html>")
-        x = self.sscls(response)
-        xpath = "//test[@foo='bar]"
-        try:
-            x.xpath(xpath)
-        except ValueError as e:
-            assert xpath in str(e), "Exception message does not contain invalid xpath"
-        except Exception:
-            raise AssertionError("A invalid XPath does not raise ValueError")
-        else:
-            raise AssertionError("A invalid XPath does not raise an exception")
+        self.assertEqual(sel.xpath("//div").getall(),
+                         ['<div><img src="a.jpg"><p>Hello</p></div>'])
 
     def test_http_header_encoding_precedence(self):
-        # u'\xa3'     = pound symbol in unicode
-        # u'\xc2\xa3' = pound symbol in utf-8
-        # u'\xa3'     = pound symbol in latin-1 (iso-8859-1)
+        # '\xa3'     = pound symbol in unicode
+        # '\xc2\xa3' = pound symbol in utf-8
+        # '\xa3'     = pound symbol in latin-1 (iso-8859-1)
 
-        meta = u'<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">'
-        head = u'<head>' + meta + u'</head>'
-        body_content = u'<span id="blank">\xa3</span>'
-        body = u'<body>' + body_content + u'</body>'
-        html = u'<html>' + head + body + u'</html>'
+        meta = '<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">'
+        head = '<head>' + meta + '</head>'
+        body_content = '<span id="blank">\xa3</span>'
+        body = '<body>' + body_content + '</body>'
+        html = '<html>' + head + body + '</html>'
         encoding = 'utf-8'
         html_utf8 = html.encode(encoding)
 
         headers = {'Content-Type': ['text/html; charset=utf-8']}
         response = HtmlResponse(url="http://example.com", headers=headers, body=html_utf8)
-        x = self.sscls(response)
-        self.assertEquals(x.xpath("//span[@id='blank']/text()").extract(),
-                          [u'\xa3'])
-
-    def test_empty_bodies(self):
-        # shouldn't raise errors
-        r1 = TextResponse('http://www.example.com', body='')
-        self.sscls(r1).xpath('//text()').extract()
-
-    def test_null_bytes(self):
-        # shouldn't raise errors
-        r1 = TextResponse('http://www.example.com', \
-                          body='<root>pre\x00post</root>', \
-                          encoding='utf-8')
-        self.sscls(r1).xpath('//text()').extract()
+        x = Selector(response)
+        self.assertEqual(x.xpath("//span[@id='blank']/text()").getall(), ['\xa3'])
 
     def test_badly_encoded_body(self):
         # \xe9 alone isn't valid utf8 sequence
-        r1 = TextResponse('http://www.example.com', \
-                          body='<html><p>an Jos\xe9 de</p><html>', \
+        r1 = TextResponse('http://www.example.com',
+                          body=b'<html><p>an Jos\xe9 de</p><html>',
                           encoding='utf-8')
-        self.sscls(r1).xpath('//text()').extract()
-
-    def test_select_on_unevaluable_nodes(self):
-        r = self.sscls(text=u'<span class="big">some text</span>')
-        # Text node
-        x1 = r.xpath('//text()')
-        self.assertEquals(x1.extract(), [u'some text'])
-        self.assertEquals(x1.xpath('.//b').extract(), [])
-        # Tag attribute
-        x1 = r.xpath('//span/@class')
-        self.assertEquals(x1.extract(), [u'big'])
-        self.assertEquals(x1.xpath('.//text()').extract(), [])
-
-    def test_select_on_text_nodes(self):
-        r = self.sscls(text=u'<div><b>Options:</b>opt1</div><div><b>Other</b>opt2</div>')
-        x1 = r.xpath("//div/descendant::text()[preceding-sibling::b[contains(text(), 'Options')]]")
-        self.assertEquals(x1.extract(), [u'opt1'])
-
-        x1 = r.xpath("//div/descendant::text()/preceding-sibling::b[contains(text(), 'Options')]")
-        self.assertEquals(x1.extract(), [u'<b>Options:</b>'])
-
-    def test_nested_select_on_text_nodes(self):
-        # FIXME: does not work with lxml backend [upstream]
-        r = self.sscls(text=u'<div><b>Options:</b>opt1</div><div><b>Other</b>opt2</div>')
-        x1 = r.xpath("//div/descendant::text()")
-        x2 = x1.xpath("./preceding-sibling::b[contains(text(), 'Options')]")
-        self.assertEquals(x2.extract(), [u'<b>Options:</b>'])
-    test_nested_select_on_text_nodes.skip = "Text nodes lost parent node reference in lxml"
+        Selector(r1).xpath('//text()').getall()
 
     def test_weakref_slots(self):
         """Check that classes are using slots and are weak-referenceable"""
-        x = self.sscls()
+        x = Selector(text='')
         weakref.ref(x)
-        assert not hasattr(x, '__dict__'), "%s does not use __slots__" % \
-            x.__class__.__name__
+        assert not hasattr(x, '__dict__'), "%s does not use __slots__" % x.__class__.__name__
 
-    def test_remove_namespaces(self):
-        xml = """<?xml version="1.0" encoding="UTF-8"?>
-<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-US" xmlns:media="http://search.yahoo.com/mrss/">
-  <link type="text/html">
-  <link type="application/atom+xml">
-</feed>
-"""
-        sel = self.sscls(XmlResponse("http://example.com/feed.atom", body=xml))
-        self.assertEqual(len(sel.xpath("//link")), 0)
-        sel.remove_namespaces()
-        self.assertEqual(len(sel.xpath("//link")), 2)
-
-    def test_remove_attributes_namespaces(self):
-        xml = """<?xml version="1.0" encoding="UTF-8"?>
-<feed xmlns:atom="http://www.w3.org/2005/Atom" xml:lang="en-US" xmlns:media="http://search.yahoo.com/mrss/">
-  <link atom:type="text/html">
-  <link atom:type="application/atom+xml">
-</feed>
-"""
-        sel = self.sscls(XmlResponse("http://example.com/feed.atom", body=xml))
-        self.assertEqual(len(sel.xpath("//link/@type")), 0)
-        sel.remove_namespaces()
-        self.assertEqual(len(sel.xpath("//link/@type")), 2)
-
-    def test_smart_strings(self):
-        """Lxml smart strings return values"""
-
-        class SmartStringsSelector(Selector):
-            _lxml_smart_strings = True
-
-        body = """<body>
-                    <div class='one'>
-                      <ul>
-                        <li>one</li><li>two</li>
-                      </ul>
-                    </div>
-                    <div class='two'>
-                      <ul>
-                        <li>four</li><li>five</li><li>six</li>
-                      </ul>
-                    </div>
-                  </body>"""
-
-        response = HtmlResponse(url="http://example.com", body=body)
-
-        # .getparent() is available for text nodes and attributes
-        # only when smart_strings are on
-        x = self.sscls(response)
-        li_text = x.xpath('//li/text()')
-        self.assertFalse(any(map(lambda e: hasattr(e._root, 'getparent'), li_text)))
-        div_class = x.xpath('//div/@class')
-        self.assertFalse(any(map(lambda e: hasattr(e._root, 'getparent'), div_class)))
-
-        x = SmartStringsSelector(response)
-        li_text = x.xpath('//li/text()')
-        self.assertTrue(all(map(lambda e: hasattr(e._root, 'getparent'), li_text)))
-        div_class = x.xpath('//div/@class')
-        self.assertTrue(all(map(lambda e: hasattr(e._root, 'getparent'), div_class)))
-
-    def test_xml_entity_expansion(self):
-        malicious_xml = '<?xml version="1.0" encoding="ISO-8859-1"?>'\
-            '<!DOCTYPE foo [ <!ELEMENT foo ANY > <!ENTITY xxe SYSTEM '\
-            '"file:///etc/passwd" >]><foo>&xxe;</foo>'
-
-        response = XmlResponse('http://example.com', body=malicious_xml)
-        sel = self.sscls(response=response)
-
-        self.assertEqual(sel.extract(), '<foo>&xxe;</foo>')
-
-
-class DeprecatedXpathSelectorTest(unittest.TestCase):
-
-    text = '<div><img src="a.jpg"><p>Hello</div>'
-
-    def test_warnings_xpathselector(self):
-        cls = XPathSelector
-        with warnings.catch_warnings(record=True) as w:
-            class UserClass(cls):
-                pass
-
-            # subclassing must issue a warning
-            self.assertEqual(len(w), 1, str(cls))
-            self.assertIn('scrapy.Selector', str(w[0].message))
-
-            # subclass instance doesn't issue a warning
-            usel = UserClass(text=self.text)
-            self.assertEqual(len(w), 1)
-
-            # class instance must issue a warning
-            sel = cls(text=self.text)
-            self.assertEqual(len(w), 2, str((cls, [x.message for x in w])))
-            self.assertIn('scrapy.Selector', str(w[1].message))
-
-            # subclass and instance checks
-            self.assertTrue(issubclass(cls, Selector))
-            self.assertTrue(isinstance(sel, Selector))
-            self.assertTrue(isinstance(usel, Selector))
-
-    def test_warnings_xmlxpathselector(self):
-        cls = XmlXPathSelector
-        with warnings.catch_warnings(record=True) as w:
-            class UserClass(cls):
-                pass
-
-            # subclassing must issue a warning
-            self.assertEqual(len(w), 1, str(cls))
-            self.assertIn('scrapy.Selector', str(w[0].message))
-
-            # subclass instance doesn't issue a warning
-            usel = UserClass(text=self.text)
-            self.assertEqual(len(w), 1)
-
-            # class instance must issue a warning
-            sel = cls(text=self.text)
-            self.assertEqual(len(w), 2, str((cls, [x.message for x in w])))
-            self.assertIn('scrapy.Selector', str(w[1].message))
-
-            # subclass and instance checks
-            self.assertTrue(issubclass(cls, Selector))
-            self.assertTrue(issubclass(cls, XPathSelector))
-            self.assertTrue(isinstance(sel, Selector))
-            self.assertTrue(isinstance(usel, Selector))
-            self.assertTrue(isinstance(sel, XPathSelector))
-            self.assertTrue(isinstance(usel, XPathSelector))
-
-    def test_warnings_htmlxpathselector(self):
-        cls = HtmlXPathSelector
-        with warnings.catch_warnings(record=True) as w:
-            class UserClass(cls):
-                pass
-
-            # subclassing must issue a warning
-            self.assertEqual(len(w), 1, str(cls))
-            self.assertIn('scrapy.Selector', str(w[0].message))
-
-            # subclass instance doesn't issue a warning
-            usel = UserClass(text=self.text)
-            self.assertEqual(len(w), 1)
-
-            # class instance must issue a warning
-            sel = cls(text=self.text)
-            self.assertEqual(len(w), 2, str((cls, [x.message for x in w])))
-            self.assertIn('scrapy.Selector', str(w[1].message))
-
-            # subclass and instance checks
-            self.assertTrue(issubclass(cls, Selector))
-            self.assertTrue(issubclass(cls, XPathSelector))
-            self.assertTrue(isinstance(sel, Selector))
-            self.assertTrue(isinstance(usel, Selector))
-            self.assertTrue(isinstance(sel, XPathSelector))
-            self.assertTrue(isinstance(usel, XPathSelector))
-
-    def test_xpathselector(self):
-        with warnings.catch_warnings(record=True):
-            hs = XPathSelector(text=self.text)
-            self.assertEqual(hs.select("//div").extract(),
-                             [u'<div><img src="a.jpg"><p>Hello</p></div>'])
-            self.assertRaises(RuntimeError, hs.css, 'div')
-
-    def test_htmlxpathselector(self):
-        with warnings.catch_warnings(record=True):
-            hs = HtmlXPathSelector(text=self.text)
-            self.assertEqual(hs.select("//div").extract(),
-                             [u'<div><img src="a.jpg"><p>Hello</p></div>'])
-            self.assertRaises(RuntimeError, hs.css, 'div')
-
-    def test_xmlxpathselector(self):
-        with warnings.catch_warnings(record=True):
-            xs = XmlXPathSelector(text=self.text)
-            self.assertEqual(xs.select("//div").extract(),
-                             [u'<div><img src="a.jpg"><p>Hello</p></img></div>'])
-            self.assertRaises(RuntimeError, xs.css, 'div')
-
-
-class ExsltTestCase(unittest.TestCase):
-
-    sscls = Selector
-
-    def test_regexp(self):
-        """EXSLT regular expression tests"""
-        body = """
-        <p><input name='a' value='1'/><input name='b' value='2'/></p>
-        <div class="links">
-        <a href="/first.html">first link</a>
-        <a href="/second.html">second link</a>
-        <a href="http://www.bayes.co.uk/xml/index.xml?/xml/utils/rechecker.xml">EXSLT match example</a>
-        </div>
-        """
-        response = TextResponse(url="http://example.com", body=body)
-        sel = self.sscls(response)
-
-        # re:test()
-        self.assertEqual(
-            sel.xpath(
-                '//input[re:test(@name, "[A-Z]+", "i")]').extract(),
-            [x.extract() for x in sel.xpath('//input[re:test(@name, "[A-Z]+", "i")]')])
-        self.assertEqual(
-            [x.extract()
-             for x in sel.xpath(
-                 '//a[re:test(@href, "\.html$")]/text()')],
-            [u'first link', u'second link'])
-        self.assertEqual(
-            [x.extract()
-             for x in sel.xpath(
-                 '//a[re:test(@href, "first")]/text()')],
-            [u'first link'])
-        self.assertEqual(
-            [x.extract()
-             for x in sel.xpath(
-                 '//a[re:test(@href, "second")]/text()')],
-            [u'second link'])
-
-
-        # re:match() is rather special: it returns a node-set of <match> nodes
-        #[u'<match>http://www.bayes.co.uk/xml/index.xml?/xml/utils/rechecker.xml</match>',
-        #u'<match>http</match>',
-        #u'<match>www.bayes.co.uk</match>',
-        #u'<match></match>',
-        #u'<match>/xml/index.xml?/xml/utils/rechecker.xml</match>']
-        self.assertEqual(
-            sel.xpath('re:match(//a[re:test(@href, "\.xml$")]/@href,'
-                      '"(\w+):\/\/([^/:]+)(:\d*)?([^# ]*)")/text()').extract(),
-            [u'http://www.bayes.co.uk/xml/index.xml?/xml/utils/rechecker.xml',
-             u'http',
-             u'www.bayes.co.uk',
-             u'',
-             u'/xml/index.xml?/xml/utils/rechecker.xml'])
-
-
-
-        # re:replace()
-        self.assertEqual(
-            sel.xpath('re:replace(//a[re:test(@href, "\.xml$")]/@href,'
-                      '"(\w+)://(.+)(\.xml)", "","https://\\2.html")').extract(),
-            [u'https://www.bayes.co.uk/xml/index.xml?/xml/utils/rechecker.html'])
-
-    def test_set(self):
-        """EXSLT set manipulation tests"""
-        # microdata example from http://schema.org/Event
-        body="""
-        <div itemscope itemtype="http://schema.org/Event">
-          <a itemprop="url" href="nba-miami-philidelphia-game3.html">
-          NBA Eastern Conference First Round Playoff Tickets:
-          <span itemprop="name"> Miami Heat at Philadelphia 76ers - Game 3 (Home Game 1) </span>
-          </a>
-
-          <meta itemprop="startDate" content="2016-04-21T20:00">
-            Thu, 04/21/16
-            8:00 p.m.
-
-          <div itemprop="location" itemscope itemtype="http://schema.org/Place">
-            <a itemprop="url" href="wells-fargo-center.html">
-            Wells Fargo Center
-            </a>
-            <div itemprop="address" itemscope itemtype="http://schema.org/PostalAddress">
-              <span itemprop="addressLocality">Philadelphia</span>,
-              <span itemprop="addressRegion">PA</span>
-            </div>
-          </div>
-
-          <div itemprop="offers" itemscope itemtype="http://schema.org/AggregateOffer">
-            Priced from: <span itemprop="lowPrice">$35</span>
-            <span itemprop="offerCount">1938</span> tickets left
-          </div>
-        </div>
-        """
-        response = TextResponse(url="http://example.com", body=body)
-        sel = self.sscls(response)
-
-        self.assertEqual(
-            sel.xpath('''//div[@itemtype="http://schema.org/Event"]
-                            //@itemprop''').extract(),
-            [u'url',
-             u'name',
-             u'startDate',
-             u'location',
-             u'url',
-             u'address',
-             u'addressLocality',
-             u'addressRegion',
-             u'offers',
-             u'lowPrice',
-             u'offerCount']
-        )
-
-        self.assertEqual(sel.xpath('''
-                set:difference(//div[@itemtype="http://schema.org/Event"]
-                                    //@itemprop,
-                               //div[@itemtype="http://schema.org/Event"]
-                                    //*[@itemscope]/*/@itemprop)''').extract(),
-                         [u'url', u'name', u'startDate', u'location', u'offers'])
+    def test_selector_bad_args(self):
+        with self.assertRaisesRegex(ValueError, 'received both response and text'):
+            Selector(TextResponse(url='http://example.com', body=b''), text='')
diff --git a/tests/test_selector_csstranslator.py b/tests/test_selector_csstranslator.py
deleted file mode 100644
index 7ef9003aa..000000000
--- a/tests/test_selector_csstranslator.py
+++ /dev/null
@@ -1,153 +0,0 @@
-"""
-Selector tests for cssselect backend
-"""
-from twisted.trial import unittest
-from scrapy.http import HtmlResponse
-from scrapy.selector.csstranslator import ScrapyHTMLTranslator
-from scrapy.selector import Selector
-from cssselect.parser import SelectorSyntaxError
-from cssselect.xpath import ExpressionError
-
-
-HTMLBODY = '''
-<html>
-<body>
-<div>
- <a id="name-anchor" name="foo"></a>
- <a id="tag-anchor" rel="tag" href="http://localhost/foo">link</a>
- <a id="nofollow-anchor" rel="nofollow" href="https://example.org"> link</a>
- <p id="paragraph">
-   lorem ipsum text
-   <b id="p-b">hi</b> <em id="p-em">there</em>
-   <b id="p-b2">guy</b>
-   <input type="checkbox" id="checkbox-unchecked" />
-   <input type="checkbox" id="checkbox-disabled" disabled="" />
-   <input type="text" id="text-checked" checked="checked" />
-   <input type="hidden" />
-   <input type="hidden" disabled="disabled" />
-   <input type="checkbox" id="checkbox-checked" checked="checked" />
-   <input type="checkbox" id="checkbox-disabled-checked"
-          disabled="disabled" checked="checked" />
-   <fieldset id="fieldset" disabled="disabled">
-     <input type="checkbox" id="checkbox-fieldset-disabled" />
-     <input type="hidden" />
-   </fieldset>
- </p>
- <map name="dummymap">
-   <area shape="circle" coords="200,250,25" href="foo.html" id="area-href" />
-   <area shape="default" id="area-nohref" />
- </map>
-</div>
-<div class="cool-footer" id="foobar-div" foobar="ab bc cde">
-    <span id="foobar-span">foo ter</span>
-</div>
-</body></html>
-'''
-
-
-class TranslatorMixinTest(unittest.TestCase):
-
-    tr_cls = ScrapyHTMLTranslator
-
-    def setUp(self):
-        self.tr = self.tr_cls()
-        self.c2x = self.tr.css_to_xpath
-
-    def test_attr_function(self):
-        cases = [
-            ('::attr(name)', u'descendant-or-self::*/@name'),
-            ('a::attr(href)', u'descendant-or-self::a/@href'),
-            ('a ::attr(img)', u'descendant-or-self::a/descendant-or-self::*/@img'),
-            ('a > ::attr(class)', u'descendant-or-self::a/*/@class'),
-        ]
-        for css, xpath in cases:
-            self.assertEqual(self.c2x(css), xpath, css)
-
-    def test_attr_function_exception(self):
-        cases = [
-            ('::attr(12)', ExpressionError),
-            ('::attr(34test)', ExpressionError),
-            ('::attr(@href)', SelectorSyntaxError),
-        ]
-        for css, exc in cases:
-            self.assertRaises(exc, self.c2x, css)
-
-    def test_text_pseudo_element(self):
-        cases = [
-            ('::text', u'descendant-or-self::text()'),
-            ('p::text', u'descendant-or-self::p/text()'),
-            ('p ::text', u'descendant-or-self::p/descendant-or-self::text()'),
-            ('#id::text', u"descendant-or-self::*[@id = 'id']/text()"),
-            ('p#id::text', u"descendant-or-self::p[@id = 'id']/text()"),
-            ('p#id ::text', u"descendant-or-self::p[@id = 'id']/descendant-or-self::text()"),
-            ('p#id > ::text', u"descendant-or-self::p[@id = 'id']/*/text()"),
-            ('p#id ~ ::text', u"descendant-or-self::p[@id = 'id']/following-sibling::*/text()"),
-            ('a[href]::text', u'descendant-or-self::a[@href]/text()'),
-            ('a[href] ::text', u'descendant-or-self::a[@href]/descendant-or-self::text()'),
-            ('p::text, a::text', u"descendant-or-self::p/text() | descendant-or-self::a/text()"),
-        ]
-        for css, xpath in cases:
-            self.assertEqual(self.c2x(css), xpath, css)
-
-    def test_pseudo_function_exception(self):
-        cases = [
-            ('::attribute(12)', ExpressionError),
-            ('::text()', ExpressionError),
-            ('::attr(@href)', SelectorSyntaxError),
-        ]
-        for css, exc in cases:
-            self.assertRaises(exc, self.c2x, css)
-
-    def test_unknown_pseudo_element(self):
-        cases = [
-            ('::text-node', ExpressionError),
-        ]
-        for css, exc in cases:
-            self.assertRaises(exc, self.c2x, css)
-
-    def test_unknown_pseudo_class(self):
-        cases = [
-            (':text', ExpressionError),
-            (':attribute(name)', ExpressionError),
-        ]
-        for css, exc in cases:
-            self.assertRaises(exc, self.c2x, css)
-
-
-class CSSSelectorTest(unittest.TestCase):
-
-    sscls = Selector
-
-    def setUp(self):
-        self.htmlresponse = HtmlResponse('http://example.com', body=HTMLBODY)
-        self.sel = self.sscls(self.htmlresponse)
-
-    def x(self, *a, **kw):
-        return [v.strip() for v in self.sel.css(*a, **kw).extract() if v.strip()]
-
-    def test_selector_simple(self):
-        for x in self.sel.css('input'):
-            self.assertTrue(isinstance(x, self.sel.__class__), x)
-        self.assertEqual(self.sel.css('input').extract(),
-                         [x.extract() for x in self.sel.css('input')])
-
-    def test_text_pseudo_element(self):
-        self.assertEqual(self.x('#p-b2'), [u'<b id="p-b2">guy</b>'])
-        self.assertEqual(self.x('#p-b2::text'), [u'guy'])
-        self.assertEqual(self.x('#p-b2 ::text'), [u'guy'])
-        self.assertEqual(self.x('#paragraph::text'), [u'lorem ipsum text'])
-        self.assertEqual(self.x('#paragraph ::text'), [u'lorem ipsum text', u'hi', u'there', u'guy'])
-        self.assertEqual(self.x('p::text'), [u'lorem ipsum text'])
-        self.assertEqual(self.x('p ::text'), [u'lorem ipsum text', u'hi', u'there', u'guy'])
-
-    def test_attribute_function(self):
-        self.assertEqual(self.x('#p-b2::attr(id)'), [u'p-b2'])
-        self.assertEqual(self.x('.cool-footer::attr(class)'), [u'cool-footer'])
-        self.assertEqual(self.x('.cool-footer ::attr(id)'), [u'foobar-div', u'foobar-span'])
-        self.assertEqual(self.x('map[name="dummymap"] ::attr(shape)'), [u'circle', u'default'])
-
-    def test_nested_selector(self):
-        self.assertEqual(self.sel.css('p').css('b::text').extract(),
-                         [u'hi', u'guy'])
-        self.assertEqual(self.sel.css('div').css('area:last-child').extract(),
-                         [u'<area shape="default" id="area-nohref">'])
diff --git a/tests/test_selector_lxmldocument.py b/tests/test_selector_lxmldocument.py
deleted file mode 100644
index 7dab1d4b1..000000000
--- a/tests/test_selector_lxmldocument.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import unittest
-from scrapy.selector.lxmldocument import LxmlDocument
-from scrapy.http import TextResponse, HtmlResponse
-
-
-class LxmlDocumentTest(unittest.TestCase):
-
-    def test_caching(self):
-        r1 = HtmlResponse('http://www.example.com', body='<html><head></head><body></body></html>')
-        r2 = r1.copy()
-
-        doc1 = LxmlDocument(r1)
-        doc2 = LxmlDocument(r1)
-        doc3 = LxmlDocument(r2)
-
-        # make sure it's cached
-        assert doc1 is doc2
-        assert doc1 is not doc3
-
-    def test_null_char(self):
-        # make sure bodies with null char ('\x00') don't raise a TypeError exception
-        body = 'test problematic \x00 body'
-        response = TextResponse('http://example.com/catalog/product/blabla-123',
-                                headers={'Content-Type': 'text/plain; charset=utf-8'},
-                                body=body)
-        LxmlDocument(response)
diff --git a/tests/test_settings/__init__.py b/tests/test_settings/__init__.py
index 39e47dec6..6e56a28f5 100644
--- a/tests/test_settings/__init__.py
+++ b/tests/test_settings/__init__.py
@@ -1,15 +1,19 @@
-import six
 import unittest
-import warnings
-try:
-    from unittest import mock
-except ImportError:
-    import mock
+from unittest import mock
 
-from scrapy.settings import Settings, SettingsAttribute, CrawlerSettings
+from scrapy.settings import (BaseSettings, Settings, SettingsAttribute,
+                             SETTINGS_PRIORITIES, get_settings_priority)
 from . import default_settings
 
 
+class SettingsGlobalFuncsTest(unittest.TestCase):
+
+    def test_get_settings_priority(self):
+        for prio_str, prio_num in SETTINGS_PRIORITIES.items():
+            self.assertEqual(get_settings_priority(prio_str), prio_num)
+        self.assertEqual(get_settings_priority(99), 99)
+
+
 class SettingsAttributeTest(unittest.TestCase):
 
     def setUp(self):
@@ -30,38 +34,34 @@ class SettingsAttributeTest(unittest.TestCase):
         self.assertEqual(self.attribute.value, 'value')
         self.assertEqual(self.attribute.priority, 10)
 
+    def test_overwrite_basesettings(self):
+        original_dict = {'one': 10, 'two': 20}
+        original_settings = BaseSettings(original_dict, 0)
+        attribute = SettingsAttribute(original_settings, 0)
 
-class SettingsTest(unittest.TestCase):
+        new_dict = {'three': 11, 'four': 21}
+        attribute.set(new_dict, 10)
+        self.assertIsInstance(attribute.value, BaseSettings)
+        self.assertCountEqual(attribute.value, new_dict)
+        self.assertCountEqual(original_settings, original_dict)
+
+        new_settings = BaseSettings({'five': 12}, 0)
+        attribute.set(new_settings, 0)  # Insufficient priority
+        self.assertCountEqual(attribute.value, new_dict)
+        attribute.set(new_settings, 10)
+        self.assertCountEqual(attribute.value, new_settings)
+
+    def test_repr(self):
+        self.assertEqual(repr(self.attribute),
+                         "<SettingsAttribute value='value' priority=10>")
+
+
+class BaseSettingsTest(unittest.TestCase):
 
     def setUp(self):
-        self.settings = Settings()
-
-    @mock.patch.dict('scrapy.settings.SETTINGS_PRIORITIES', {'default': 10})
-    @mock.patch('scrapy.settings.default_settings', default_settings)
-    def test_initial_defaults(self):
-        settings = Settings()
-        self.assertEqual(len(settings.attributes), 1)
-        self.assertIn('TEST_DEFAULT', settings.attributes)
-
-        attr = settings.attributes['TEST_DEFAULT']
-        self.assertIsInstance(attr, SettingsAttribute)
-        self.assertEqual(attr.value, 'defvalue')
-        self.assertEqual(attr.priority, 10)
-
-    @mock.patch.dict('scrapy.settings.SETTINGS_PRIORITIES', {})
-    @mock.patch('scrapy.settings.default_settings', {})
-    def test_initial_values(self):
-        settings = Settings({'TEST_OPTION': 'value'}, 10)
-        self.assertEqual(len(settings.attributes), 1)
-        self.assertIn('TEST_OPTION', settings.attributes)
-
-        attr = settings.attributes['TEST_OPTION']
-        self.assertIsInstance(attr, SettingsAttribute)
-        self.assertEqual(attr.value, 'value')
-        self.assertEqual(attr.priority, 10)
+        self.settings = BaseSettings()
 
     def test_set_new_attribute(self):
-        self.settings.attributes = {}
         self.settings.set('TEST_OPTION', 'value', 0)
         self.assertIn('TEST_OPTION', self.settings.attributes)
 
@@ -70,6 +70,12 @@ class SettingsTest(unittest.TestCase):
         self.assertEqual(attr.value, 'value')
         self.assertEqual(attr.priority, 0)
 
+    def test_set_settingsattribute(self):
+        myattr = SettingsAttribute(0, 30)  # Note priority 30
+        self.settings.set('TEST_ATTR', myattr, 10)
+        self.assertEqual(self.settings.get('TEST_ATTR'), 0)
+        self.assertEqual(self.settings.getpriority('TEST_ATTR'), 30)
+
     def test_set_instance_identity_on_update(self):
         attr = SettingsAttribute('value', 0)
         self.settings.attributes = {'TEST_OPTION': attr}
@@ -79,13 +85,10 @@ class SettingsTest(unittest.TestCase):
         self.assertIs(attr, self.settings.attributes['TEST_OPTION'])
 
     def test_set_calls_settings_attributes_methods_on_update(self):
-        with mock.patch.object(SettingsAttribute, '__setattr__') as mock_setattr, \
-                mock.patch.object(SettingsAttribute, 'set') as mock_set:
+        attr = SettingsAttribute('value', 10)
+        with mock.patch.object(attr, '__setattr__') as mock_setattr, mock.patch.object(attr, 'set') as mock_set:
 
-            attr = SettingsAttribute('value', 10)
             self.settings.attributes = {'TEST_OPTION': attr}
-            mock_set.reset_mock()
-            mock_setattr.reset_mock()
 
             for priority in (0, 10, 20):
                 self.settings.set('TEST_OPTION', 'othervalue', priority)
@@ -94,6 +97,19 @@ class SettingsTest(unittest.TestCase):
                 mock_set.reset_mock()
                 mock_setattr.reset_mock()
 
+    def test_setitem(self):
+        settings = BaseSettings()
+        settings.set('key', 'a', 'default')
+        settings['key'] = 'b'
+        self.assertEqual(settings['key'], 'b')
+        self.assertEqual(settings.getpriority('key'), 20)
+        settings['key'] = 'c'
+        self.assertEqual(settings['key'], 'c')
+        settings['key2'] = 'x'
+        self.assertIn('key2', settings)
+        self.assertEqual(settings['key2'], 'x')
+        self.assertEqual(settings.getpriority('key2'), 20)
+
     def test_setdict_alias(self):
         with mock.patch.object(self.settings, 'set') as mock_set:
             self.settings.setdict({'TEST_1': 'value1', 'TEST_2': 'value2'}, 10)
@@ -118,7 +134,8 @@ class SettingsTest(unittest.TestCase):
     def test_setmodule_alias(self):
         with mock.patch.object(self.settings, 'set') as mock_set:
             self.settings.setmodule(default_settings, 10)
-            mock_set.assert_called_with('TEST_DEFAULT', 'defvalue', 10)
+            mock_set.assert_any_call('TEST_DEFAULT', 'defvalue', 10)
+            mock_set.assert_any_call('TEST_DICT', {'key': 'val'}, 10)
 
     def test_setmodule_by_path(self):
         self.settings.attributes = {}
@@ -129,22 +146,73 @@ class SettingsTest(unittest.TestCase):
         self.settings.setmodule(
             'tests.test_settings.default_settings', 10)
 
-        self.assertItemsEqual(six.iterkeys(self.settings.attributes),
-                              six.iterkeys(ctrl_attributes))
+        self.assertCountEqual(self.settings.attributes.keys(),
+                              ctrl_attributes.keys())
 
-        for attr, ctrl_attr in zip(six.itervalues(self.settings.attributes),
-                                   six.itervalues(ctrl_attributes)):
+        for key in ctrl_attributes.keys():
+            attr = self.settings.attributes[key]
+            ctrl_attr = ctrl_attributes[key]
             self.assertEqual(attr.value, ctrl_attr.value)
             self.assertEqual(attr.priority, ctrl_attr.priority)
 
+    def test_update(self):
+        settings = BaseSettings({'key_lowprio': 0}, priority=0)
+        settings.set('key_highprio', 10, priority=50)
+        custom_settings = BaseSettings({'key_lowprio': 1, 'key_highprio': 11},
+                                       priority=30)
+        custom_settings.set('newkey_one', None, priority=50)
+        custom_dict = {'key_lowprio': 2, 'key_highprio': 12, 'newkey_two': None}
+
+        settings.update(custom_dict, priority=20)
+        self.assertEqual(settings['key_lowprio'], 2)
+        self.assertEqual(settings.getpriority('key_lowprio'), 20)
+        self.assertEqual(settings['key_highprio'], 10)
+        self.assertIn('newkey_two', settings)
+        self.assertEqual(settings.getpriority('newkey_two'), 20)
+
+        settings.update(custom_settings)
+        self.assertEqual(settings['key_lowprio'], 1)
+        self.assertEqual(settings.getpriority('key_lowprio'), 30)
+        self.assertEqual(settings['key_highprio'], 10)
+        self.assertIn('newkey_one', settings)
+        self.assertEqual(settings.getpriority('newkey_one'), 50)
+
+        settings.update({'key_lowprio': 3}, priority=20)
+        self.assertEqual(settings['key_lowprio'], 1)
+
+    def test_update_jsonstring(self):
+        settings = BaseSettings({'number': 0, 'dict': BaseSettings({'key': 'val'})})
+        settings.update('{"number": 1, "newnumber": 2}')
+        self.assertEqual(settings['number'], 1)
+        self.assertEqual(settings['newnumber'], 2)
+        settings.set("dict", '{"key": "newval", "newkey": "newval2"}')
+        self.assertEqual(settings['dict']['key'], "newval")
+        self.assertEqual(settings['dict']['newkey'], "newval2")
+
+    def test_delete(self):
+        settings = BaseSettings({'key': None})
+        settings.set('key_highprio', None, priority=50)
+        settings.delete('key')
+        settings.delete('key_highprio')
+        self.assertNotIn('key', settings)
+        self.assertIn('key_highprio', settings)
+        del settings['key_highprio']
+        self.assertNotIn('key_highprio', settings)
+
     def test_get(self):
         test_configuration = {
             'TEST_ENABLED1': '1',
             'TEST_ENABLED2': True,
             'TEST_ENABLED3': 1,
+            'TEST_ENABLED4': 'True',
+            'TEST_ENABLED5': 'true',
+            'TEST_ENABLED_WRONG': 'on',
             'TEST_DISABLED1': '0',
             'TEST_DISABLED2': False,
             'TEST_DISABLED3': 0,
+            'TEST_DISABLED4': 'False',
+            'TEST_DISABLED5': 'false',
+            'TEST_DISABLED_WRONG': 'off',
             'TEST_INT1': 123,
             'TEST_INT2': '123',
             'TEST_FLOAT1': 123.45,
@@ -157,16 +225,20 @@ class SettingsTest(unittest.TestCase):
         }
         settings = self.settings
         settings.attributes = {key: SettingsAttribute(value, 0) for key, value
-                               in six.iteritems(test_configuration)}
+                               in test_configuration.items()}
 
         self.assertTrue(settings.getbool('TEST_ENABLED1'))
         self.assertTrue(settings.getbool('TEST_ENABLED2'))
         self.assertTrue(settings.getbool('TEST_ENABLED3'))
+        self.assertTrue(settings.getbool('TEST_ENABLED4'))
+        self.assertTrue(settings.getbool('TEST_ENABLED5'))
         self.assertFalse(settings.getbool('TEST_ENABLEDx'))
         self.assertTrue(settings.getbool('TEST_ENABLEDx', True))
         self.assertFalse(settings.getbool('TEST_DISABLED1'))
         self.assertFalse(settings.getbool('TEST_DISABLED2'))
         self.assertFalse(settings.getbool('TEST_DISABLED3'))
+        self.assertFalse(settings.getbool('TEST_DISABLED4'))
+        self.assertFalse(settings.getbool('TEST_DISABLED5'))
         self.assertEqual(settings.getint('TEST_INT1'), 123)
         self.assertEqual(settings.getint('TEST_INT2'), 123)
         self.assertEqual(settings.getint('TEST_INTx'), 0)
@@ -189,64 +261,129 @@ class SettingsTest(unittest.TestCase):
         self.assertEqual(settings.getdict('TEST_DICT3'), {})
         self.assertEqual(settings.getdict('TEST_DICT3', {'key1': 5}), {'key1': 5})
         self.assertRaises(ValueError, settings.getdict, 'TEST_LIST1')
+        self.assertRaises(ValueError, settings.getbool, 'TEST_ENABLED_WRONG')
+        self.assertRaises(ValueError, settings.getbool, 'TEST_DISABLED_WRONG')
 
-    def test_deprecated_attribute_overrides(self):
-        self.settings.set('BAR', 'fuz', priority='cmdline')
-        with warnings.catch_warnings(record=True) as w:
-            self.settings.overrides['BAR'] = 'foo'
-            self.assertIn("Settings.overrides", str(w[0].message))
-            self.assertEqual(self.settings.get('BAR'), 'foo')
-            self.assertEqual(self.settings.overrides.get('BAR'), 'foo')
-            self.assertIn('BAR', self.settings.overrides)
+    def test_getpriority(self):
+        settings = BaseSettings({'key': 'value'}, priority=99)
+        self.assertEqual(settings.getpriority('key'), 99)
+        self.assertEqual(settings.getpriority('nonexistentkey'), None)
 
-            self.settings.overrides.update(BAR='bus')
-            self.assertEqual(self.settings.get('BAR'), 'bus')
-            self.assertEqual(self.settings.overrides.get('BAR'), 'bus')
+    def test_getwithbase(self):
+        s = BaseSettings({'TEST_BASE': BaseSettings({1: 1, 2: 2}, 'project'),
+                          'TEST': BaseSettings({1: 10, 3: 30}, 'default'),
+                          'HASNOBASE': BaseSettings({3: 3000}, 'default')})
+        s['TEST'].set(2, 200, 'cmdline')
+        self.assertCountEqual(s.getwithbase('TEST'), {1: 1, 2: 200, 3: 30})
+        self.assertCountEqual(s.getwithbase('HASNOBASE'), s['HASNOBASE'])
+        self.assertEqual(s.getwithbase('NONEXISTENT'), {})
 
-            self.settings.overrides.setdefault('BAR', 'fez')
-            self.assertEqual(self.settings.get('BAR'), 'bus')
+    def test_maxpriority(self):
+        # Empty settings should return 'default'
+        self.assertEqual(self.settings.maxpriority(), 0)
+        self.settings.set('A', 0, 10)
+        self.settings.set('B', 0, 30)
+        self.assertEqual(self.settings.maxpriority(), 30)
 
-            self.settings.overrides.setdefault('FOO', 'fez')
-            self.assertEqual(self.settings.get('FOO'), 'fez')
-            self.assertEqual(self.settings.overrides.get('FOO'), 'fez')
+    def test_copy(self):
+        values = {
+            'TEST_BOOL': True,
+            'TEST_LIST': ['one', 'two'],
+            'TEST_LIST_OF_LISTS': [['first_one', 'first_two'],
+                                   ['second_one', 'second_two']]
+        }
+        self.settings.setdict(values)
+        copy = self.settings.copy()
+        self.settings.set('TEST_BOOL', False)
+        self.assertTrue(copy.get('TEST_BOOL'))
+
+        test_list = self.settings.get('TEST_LIST')
+        test_list.append('three')
+        self.assertListEqual(copy.get('TEST_LIST'), ['one', 'two'])
+
+        test_list_of_lists = self.settings.get('TEST_LIST_OF_LISTS')
+        test_list_of_lists[0].append('first_three')
+        self.assertListEqual(copy.get('TEST_LIST_OF_LISTS')[0],
+                             ['first_one', 'first_two'])
+
+    def test_copy_to_dict(self):
+        s = BaseSettings({'TEST_STRING': 'a string',
+                          'TEST_LIST': [1, 2],
+                          'TEST_BOOLEAN': False,
+                          'TEST_BASE': BaseSettings({1: 1, 2: 2}, 'project'),
+                          'TEST': BaseSettings({1: 10, 3: 30}, 'default'),
+                          'HASNOBASE': BaseSettings({3: 3000}, 'default')})
+        self.assertDictEqual(
+            s.copy_to_dict(),
+            {
+                'HASNOBASE': {3: 3000},
+                'TEST': {1: 10, 3: 30},
+                'TEST_BASE': {1: 1, 2: 2},
+                'TEST_LIST': [1, 2],
+                'TEST_BOOLEAN': False,
+                'TEST_STRING': 'a string',
+            }
+        )
+
+    def test_freeze(self):
+        self.settings.freeze()
+        with self.assertRaises(TypeError) as cm:
+            self.settings.set('TEST_BOOL', False)
+            self.assertEqual(str(cm.exception),
+                             "Trying to modify an immutable Settings object")
+
+    def test_frozencopy(self):
+        frozencopy = self.settings.frozencopy()
+        self.assertTrue(frozencopy.frozen)
+        self.assertIsNot(frozencopy, self.settings)
 
 
-    def test_deprecated_attribute_defaults(self):
-        self.settings.set('BAR', 'fuz', priority='default')
-        with warnings.catch_warnings(record=True) as w:
-            self.settings.defaults['BAR'] = 'foo'
-            self.assertIn("Settings.defaults", str(w[0].message))
-            self.assertEqual(self.settings.get('BAR'), 'foo')
-            self.assertEqual(self.settings.defaults.get('BAR'), 'foo')
-            self.assertIn('BAR', self.settings.defaults)
+class SettingsTest(unittest.TestCase):
 
+    def setUp(self):
+        self.settings = Settings()
 
-class CrawlerSettingsTest(unittest.TestCase):
+    @mock.patch.dict('scrapy.settings.SETTINGS_PRIORITIES', {'default': 10})
+    @mock.patch('scrapy.settings.default_settings', default_settings)
+    def test_initial_defaults(self):
+        settings = Settings()
+        self.assertEqual(len(settings.attributes), 2)
+        self.assertIn('TEST_DEFAULT', settings.attributes)
 
-    def test_deprecated_crawlersettings(self):
-        def _get_settings(settings_dict=None):
-            settings_module = type('SettingsModuleMock', (object,), settings_dict or {})
-            return CrawlerSettings(settings_module)
+        attr = settings.attributes['TEST_DEFAULT']
+        self.assertIsInstance(attr, SettingsAttribute)
+        self.assertEqual(attr.value, 'defvalue')
+        self.assertEqual(attr.priority, 10)
 
-        with warnings.catch_warnings(record=True) as w:
-            settings = _get_settings()
-            self.assertIn("CrawlerSettings is deprecated", str(w[0].message))
+    @mock.patch.dict('scrapy.settings.SETTINGS_PRIORITIES', {})
+    @mock.patch('scrapy.settings.default_settings', {})
+    def test_initial_values(self):
+        settings = Settings({'TEST_OPTION': 'value'}, 10)
+        self.assertEqual(len(settings.attributes), 1)
+        self.assertIn('TEST_OPTION', settings.attributes)
 
-            # test_global_defaults
-            self.assertEqual(settings.getint('DOWNLOAD_TIMEOUT'), 180)
+        attr = settings.attributes['TEST_OPTION']
+        self.assertIsInstance(attr, SettingsAttribute)
+        self.assertEqual(attr.value, 'value')
+        self.assertEqual(attr.priority, 10)
 
-            # test_defaults
-            settings.defaults['DOWNLOAD_TIMEOUT'] = '99'
-            self.assertEqual(settings.getint('DOWNLOAD_TIMEOUT'), 99)
+    @mock.patch('scrapy.settings.default_settings', default_settings)
+    def test_autopromote_dicts(self):
+        settings = Settings()
+        mydict = settings.get('TEST_DICT')
+        self.assertIsInstance(mydict, BaseSettings)
+        self.assertIn('key', mydict)
+        self.assertEqual(mydict['key'], 'val')
+        self.assertEqual(mydict.getpriority('key'), 0)
 
-            # test_settings_module
-            settings = _get_settings({'DOWNLOAD_TIMEOUT': '3'})
-            self.assertEqual(settings.getint('DOWNLOAD_TIMEOUT'), 3)
-
-            # test_overrides
-            settings = _get_settings({'DOWNLOAD_TIMEOUT': '3'})
-            settings.overrides['DOWNLOAD_TIMEOUT'] = '15'
-            self.assertEqual(settings.getint('DOWNLOAD_TIMEOUT'), 15)
+    @mock.patch('scrapy.settings.default_settings', default_settings)
+    def test_getdict_autodegrade_basesettings(self):
+        settings = Settings()
+        mydict = settings.getdict('TEST_DICT')
+        self.assertIsInstance(mydict, dict)
+        self.assertEqual(len(mydict), 1)
+        self.assertIn('key', mydict)
+        self.assertEqual(mydict['key'], 'val')
 
 
 if __name__ == "__main__":
diff --git a/tests/test_settings/default_settings.py b/tests/test_settings/default_settings.py
index 23005d4c6..26a555275 100644
--- a/tests/test_settings/default_settings.py
+++ b/tests/test_settings/default_settings.py
@@ -1,2 +1,4 @@
 
 TEST_DEFAULT = 'defvalue'
+
+TEST_DICT = {'key': 'val'}
diff --git a/tests/test_signals.py b/tests/test_signals.py
new file mode 100644
index 000000000..d6ae526be
--- /dev/null
+++ b/tests/test_signals.py
@@ -0,0 +1,44 @@
+from pytest import mark
+from twisted.internet import defer
+from twisted.trial import unittest
+
+from scrapy import signals, Request, Spider
+from scrapy.utils.test import get_crawler, get_from_asyncio_queue
+
+from tests.mockserver import MockServer
+
+
+class ItemSpider(Spider):
+    name = 'itemspider'
+
+    def start_requests(self):
+        for index in range(10):
+            yield Request(self.mockserver.url('/status?n=200&id=%d' % index),
+                          meta={'index': index})
+
+    def parse(self, response):
+        return {'index': response.meta['index']}
+
+
+class AsyncSignalTestCase(unittest.TestCase):
+    def setUp(self):
+        self.mockserver = MockServer()
+        self.mockserver.__enter__()
+        self.items = []
+
+    def tearDown(self):
+        self.mockserver.__exit__(None, None, None)
+
+    async def _on_item_scraped(self, item):
+        item = await get_from_asyncio_queue(item)
+        self.items.append(item)
+
+    @mark.only_asyncio()
+    @defer.inlineCallbacks
+    def test_simple_pipeline(self):
+        crawler = get_crawler(ItemSpider)
+        crawler.signals.connect(self._on_item_scraped, signals.item_scraped)
+        yield crawler.crawl(mockserver=self.mockserver)
+        self.assertEqual(len(self.items), 10)
+        for index in range(10):
+            self.assertIn({'index': index}, self.items)
diff --git a/tests/test_spider.py b/tests/test_spider.py
index 903eff7b1..78157a9b9 100644
--- a/tests/test_spider.py
+++ b/tests/test_spider.py
@@ -1,18 +1,27 @@
 import gzip
 import inspect
+from unittest import mock
 import warnings
-from scrapy.utils.trackref import object_ref
 from io import BytesIO
 
+from testfixtures import LogCapture
 from twisted.trial import unittest
 
-from scrapy.spider import Spider, BaseSpider
+from scrapy import signals
+from scrapy.settings import Settings
 from scrapy.http import Request, Response, TextResponse, XmlResponse, HtmlResponse
-from scrapy.contrib.spiders.init import InitSpider
-from scrapy.contrib.spiders import CrawlSpider, Rule, XMLFeedSpider, \
-    CSVFeedSpider, SitemapSpider
-from scrapy.contrib.linkextractors import LinkExtractor
+from scrapy.spiders.init import InitSpider
+from scrapy.spiders import (
+    CSVFeedSpider,
+    CrawlSpider,
+    Rule,
+    SitemapSpider,
+    Spider,
+    XMLFeedSpider,
+)
+from scrapy.linkextractors import LinkExtractor
 from scrapy.exceptions import ScrapyDeprecationWarning
+from scrapy.utils.test import get_crawler
 
 
 class SpiderTest(unittest.TestCase):
@@ -37,15 +46,72 @@ class SpiderTest(unittest.TestCase):
         self.assertEqual(list(start_requests), [])
 
     def test_spider_args(self):
-        """Constructor arguments are assigned to spider attributes"""
+        """``__init__`` method arguments are assigned to spider attributes"""
         spider = self.spider_class('example.com', foo='bar')
         self.assertEqual(spider.foo, 'bar')
 
     def test_spider_without_name(self):
-        """Constructor arguments are assigned to spider attributes"""
+        """``__init__`` method arguments are assigned to spider attributes"""
         self.assertRaises(ValueError, self.spider_class)
         self.assertRaises(ValueError, self.spider_class, somearg='foo')
 
+    def test_from_crawler_crawler_and_settings_population(self):
+        crawler = get_crawler()
+        spider = self.spider_class.from_crawler(crawler, 'example.com')
+        self.assertTrue(hasattr(spider, 'crawler'))
+        self.assertIs(spider.crawler, crawler)
+        self.assertTrue(hasattr(spider, 'settings'))
+        self.assertIs(spider.settings, crawler.settings)
+
+    def test_from_crawler_init_call(self):
+        with mock.patch.object(self.spider_class, '__init__',
+                               return_value=None) as mock_init:
+            self.spider_class.from_crawler(get_crawler(), 'example.com',
+                                           foo='bar')
+            mock_init.assert_called_once_with('example.com', foo='bar')
+
+    def test_closed_signal_call(self):
+        class TestSpider(self.spider_class):
+            closed_called = False
+
+            def closed(self, reason):
+                self.closed_called = True
+
+        crawler = get_crawler()
+        spider = TestSpider.from_crawler(crawler, 'example.com')
+        crawler.signals.send_catch_log(signal=signals.spider_opened,
+                                       spider=spider)
+        crawler.signals.send_catch_log(signal=signals.spider_closed,
+                                       spider=spider, reason=None)
+        self.assertTrue(spider.closed_called)
+
+    def test_update_settings(self):
+        spider_settings = {'TEST1': 'spider', 'TEST2': 'spider'}
+        project_settings = {'TEST1': 'project', 'TEST3': 'project'}
+        self.spider_class.custom_settings = spider_settings
+        settings = Settings(project_settings, priority='project')
+
+        self.spider_class.update_settings(settings)
+        self.assertEqual(settings.get('TEST1'), 'spider')
+        self.assertEqual(settings.get('TEST2'), 'spider')
+        self.assertEqual(settings.get('TEST3'), 'project')
+
+    def test_logger(self):
+        spider = self.spider_class('example.com')
+        with LogCapture() as lc:
+            spider.logger.info('test log msg')
+        lc.check(('example.com', 'INFO', 'test log msg'))
+
+        record = lc.records[0]
+        self.assertIn('spider', record.__dict__)
+        self.assertIs(record.spider, spider)
+
+    def test_log(self):
+        spider = self.spider_class('example.com')
+        with mock.patch('scrapy.spiders.Spider.logger') as mock_logger:
+            spider.log('test log msg', 'INFO')
+        mock_logger.log.assert_called_once_with('INFO', 'test log msg')
+
 
 class InitSpiderTest(SpiderTest):
 
@@ -60,7 +126,9 @@ class XMLFeedSpiderTest(SpiderTest):
         body = b"""<?xml version="1.0" encoding="UTF-8"?>
         <urlset xmlns:x="http://www.google.com/schemas/sitemap/0.84"
                 xmlns:y="http://www.example.com/schemas/extras/1.0">
-        <url><x:loc>http://www.example.com/Special-Offers.html</loc><y:updated>2009-08-16</updated><other value="bar" y:custom="fuu"/></url>
+        <url><x:loc>http://www.example.com/Special-Offers.html</loc><y:updated>2009-08-16</updated>
+            <other value="bar" y:custom="fuu"/>
+        </url>
         <url><loc>http://www.example.com/</loc><y:updated>2009-08-16</updated><other value="foo"/></url>
         </urlset>"""
         response = XmlResponse(url='http://example.com/sitemap.xml', body=body)
@@ -74,24 +142,24 @@ class XMLFeedSpiderTest(SpiderTest):
 
             def parse_node(self, response, selector):
                 yield {
-                    'loc': selector.xpath('a:loc/text()').extract(),
-                    'updated': selector.xpath('b:updated/text()').extract(),
-                    'other': selector.xpath('other/@value').extract(),
-                    'custom': selector.xpath('other/@b:custom').extract(),
+                    'loc': selector.xpath('a:loc/text()').getall(),
+                    'updated': selector.xpath('b:updated/text()').getall(),
+                    'other': selector.xpath('other/@value').getall(),
+                    'custom': selector.xpath('other/@b:custom').getall(),
                 }
 
         for iterator in ('iternodes', 'xml'):
             spider = _XMLSpider('example', iterator=iterator)
-            output = list(spider.parse(response))
+            output = list(spider._parse(response))
             self.assertEqual(len(output), 2, iterator)
             self.assertEqual(output, [
-                {'loc': [u'http://www.example.com/Special-Offers.html'],
-                 'updated': [u'2009-08-16'],
-                 'custom': [u'fuu'],
-                 'other': [u'bar']},
+                {'loc': ['http://www.example.com/Special-Offers.html'],
+                 'updated': ['2009-08-16'],
+                 'custom': ['fuu'],
+                 'other': ['bar']},
                 {'loc': [],
-                 'updated': [u'2009-08-16'],
-                 'other': [u'foo'],
+                 'updated': ['2009-08-16'],
+                 'other': ['foo'],
                  'custom': []},
             ], iterator)
 
@@ -115,14 +183,33 @@ class CrawlSpiderTest(SpiderTest):
     </body></html>"""
     spider_class = CrawlSpider
 
-    def test_process_links(self):
+    def test_rule_without_link_extractor(self):
 
-        response = HtmlResponse("http://example.org/somepage/index.html",
-            body=self.test_body)
+        response = HtmlResponse("http://example.org/somepage/index.html", body=self.test_body)
 
         class _CrawlSpider(self.spider_class):
-            name="test"
-            allowed_domains=['example.org']
+            name = "test"
+            allowed_domains = ['example.org']
+            rules = (
+                Rule(),
+            )
+
+        spider = _CrawlSpider()
+        output = list(spider._requests_to_follow(response))
+        self.assertEqual(len(output), 3)
+        self.assertTrue(all(map(lambda r: isinstance(r, Request), output)))
+        self.assertEqual([r.url for r in output],
+                         ['http://example.org/somepage/item/12.html',
+                          'http://example.org/about.html',
+                          'http://example.org/nofollow.html'])
+
+    def test_process_links(self):
+
+        response = HtmlResponse("http://example.org/somepage/index.html", body=self.test_body)
+
+        class _CrawlSpider(self.spider_class):
+            name = "test"
+            allowed_domains = ['example.org']
             rules = (
                 Rule(LinkExtractor(), process_links="dummy_process_links"),
             )
@@ -134,25 +221,25 @@ class CrawlSpiderTest(SpiderTest):
         output = list(spider._requests_to_follow(response))
         self.assertEqual(len(output), 3)
         self.assertTrue(all(map(lambda r: isinstance(r, Request), output)))
-        self.assertEquals([r.url for r in output],
-                          ['http://example.org/somepage/item/12.html',
-                           'http://example.org/about.html',
-                           'http://example.org/nofollow.html'])
+        self.assertEqual([r.url for r in output],
+                         ['http://example.org/somepage/item/12.html',
+                          'http://example.org/about.html',
+                          'http://example.org/nofollow.html'])
 
     def test_process_links_filter(self):
 
-        response = HtmlResponse("http://example.org/somepage/index.html",
-            body=self.test_body)
+        response = HtmlResponse("http://example.org/somepage/index.html", body=self.test_body)
 
         class _CrawlSpider(self.spider_class):
             import re
 
-            name="test"
-            allowed_domains=['example.org']
+            name = "test"
+            allowed_domains = ['example.org']
             rules = (
                 Rule(LinkExtractor(), process_links="filter_process_links"),
             )
             _test_regex = re.compile('nofollow')
+
             def filter_process_links(self, links):
                 return [link for link in links
                         if not self._test_regex.search(link.url)]
@@ -161,18 +248,17 @@ class CrawlSpiderTest(SpiderTest):
         output = list(spider._requests_to_follow(response))
         self.assertEqual(len(output), 2)
         self.assertTrue(all(map(lambda r: isinstance(r, Request), output)))
-        self.assertEquals([r.url for r in output],
-                          ['http://example.org/somepage/item/12.html',
-                           'http://example.org/about.html'])
+        self.assertEqual([r.url for r in output],
+                         ['http://example.org/somepage/item/12.html',
+                          'http://example.org/about.html'])
 
     def test_process_links_generator(self):
 
-        response = HtmlResponse("http://example.org/somepage/index.html",
-            body=self.test_body)
+        response = HtmlResponse("http://example.org/somepage/index.html", body=self.test_body)
 
         class _CrawlSpider(self.spider_class):
-            name="test"
-            allowed_domains=['example.org']
+            name = "test"
+            allowed_domains = ['example.org']
             rules = (
                 Rule(LinkExtractor(), process_links="dummy_process_links"),
             )
@@ -185,10 +271,134 @@ class CrawlSpiderTest(SpiderTest):
         output = list(spider._requests_to_follow(response))
         self.assertEqual(len(output), 3)
         self.assertTrue(all(map(lambda r: isinstance(r, Request), output)))
-        self.assertEquals([r.url for r in output],
-                          ['http://example.org/somepage/item/12.html',
-                           'http://example.org/about.html',
-                           'http://example.org/nofollow.html'])
+        self.assertEqual([r.url for r in output],
+                         ['http://example.org/somepage/item/12.html',
+                          'http://example.org/about.html',
+                          'http://example.org/nofollow.html'])
+
+    def test_process_request(self):
+
+        response = HtmlResponse("http://example.org/somepage/index.html", body=self.test_body)
+
+        def process_request_change_domain(request):
+            return request.replace(url=request.url.replace('.org', '.com'))
+
+        class _CrawlSpider(self.spider_class):
+            name = "test"
+            allowed_domains = ['example.org']
+            rules = (
+                Rule(LinkExtractor(), process_request=process_request_change_domain),
+            )
+
+        with warnings.catch_warnings(record=True) as cw:
+            spider = _CrawlSpider()
+            output = list(spider._requests_to_follow(response))
+            self.assertEqual(len(output), 3)
+            self.assertTrue(all(map(lambda r: isinstance(r, Request), output)))
+            self.assertEqual([r.url for r in output],
+                             ['http://example.com/somepage/item/12.html',
+                              'http://example.com/about.html',
+                              'http://example.com/nofollow.html'])
+            self.assertEqual(len(cw), 1)
+            self.assertEqual(cw[0].category, ScrapyDeprecationWarning)
+
+    def test_process_request_with_response(self):
+
+        response = HtmlResponse("http://example.org/somepage/index.html", body=self.test_body)
+
+        def process_request_meta_response_class(request, response):
+            request.meta['response_class'] = response.__class__.__name__
+            return request
+
+        class _CrawlSpider(self.spider_class):
+            name = "test"
+            allowed_domains = ['example.org']
+            rules = (
+                Rule(LinkExtractor(), process_request=process_request_meta_response_class),
+            )
+
+        spider = _CrawlSpider()
+        output = list(spider._requests_to_follow(response))
+        self.assertEqual(len(output), 3)
+        self.assertTrue(all(map(lambda r: isinstance(r, Request), output)))
+        self.assertEqual([r.url for r in output],
+                         ['http://example.org/somepage/item/12.html',
+                          'http://example.org/about.html',
+                          'http://example.org/nofollow.html'])
+        self.assertEqual([r.meta['response_class'] for r in output],
+                         ['HtmlResponse', 'HtmlResponse', 'HtmlResponse'])
+
+    def test_process_request_instance_method(self):
+
+        response = HtmlResponse("http://example.org/somepage/index.html", body=self.test_body)
+
+        class _CrawlSpider(self.spider_class):
+            name = "test"
+            allowed_domains = ['example.org']
+            rules = (
+                Rule(LinkExtractor(), process_request='process_request_upper'),
+            )
+
+            def process_request_upper(self, request):
+                return request.replace(url=request.url.upper())
+
+        with warnings.catch_warnings(record=True) as cw:
+            spider = _CrawlSpider()
+            output = list(spider._requests_to_follow(response))
+            self.assertEqual(len(output), 3)
+            self.assertTrue(all(map(lambda r: isinstance(r, Request), output)))
+            self.assertEqual([r.url for r in output],
+                             ['http://EXAMPLE.ORG/SOMEPAGE/ITEM/12.HTML',
+                              'http://EXAMPLE.ORG/ABOUT.HTML',
+                              'http://EXAMPLE.ORG/NOFOLLOW.HTML'])
+            self.assertEqual(len(cw), 1)
+            self.assertEqual(cw[0].category, ScrapyDeprecationWarning)
+
+    def test_process_request_instance_method_with_response(self):
+
+        response = HtmlResponse("http://example.org/somepage/index.html", body=self.test_body)
+
+        class _CrawlSpider(self.spider_class):
+            name = "test"
+            allowed_domains = ['example.org']
+            rules = (
+                Rule(LinkExtractor(), process_request='process_request_meta_response_class'),
+            )
+
+            def process_request_meta_response_class(self, request, response):
+                request.meta['response_class'] = response.__class__.__name__
+                return request
+
+        spider = _CrawlSpider()
+        output = list(spider._requests_to_follow(response))
+        self.assertEqual(len(output), 3)
+        self.assertTrue(all(map(lambda r: isinstance(r, Request), output)))
+        self.assertEqual([r.url for r in output],
+                         ['http://example.org/somepage/item/12.html',
+                          'http://example.org/about.html',
+                          'http://example.org/nofollow.html'])
+        self.assertEqual([r.meta['response_class'] for r in output],
+                         ['HtmlResponse', 'HtmlResponse', 'HtmlResponse'])
+
+    def test_follow_links_attribute_population(self):
+        crawler = get_crawler()
+        spider = self.spider_class.from_crawler(crawler, 'example.com')
+        self.assertTrue(hasattr(spider, '_follow_links'))
+        self.assertTrue(spider._follow_links)
+
+        settings_dict = {'CRAWLSPIDER_FOLLOW_LINKS': False}
+        crawler = get_crawler(settings_dict=settings_dict)
+        spider = self.spider_class.from_crawler(crawler, 'example.com')
+        self.assertTrue(hasattr(spider, '_follow_links'))
+        self.assertFalse(spider._follow_links)
+
+    def test_start_url(self):
+        spider = self.spider_class("example.com")
+        spider.start_url = 'https://www.example.com'
+
+        with self.assertRaisesRegex(AttributeError,
+                                    r'^Crawling could not start.*$'):
+            list(spider.start_requests())
 
 
 class SitemapSpiderTest(SpiderTest):
@@ -202,82 +412,228 @@ class SitemapSpiderTest(SpiderTest):
     g.close()
     GZBODY = f.getvalue()
 
-    def test_get_sitemap_body(self):
+    def assertSitemapBody(self, response, body):
         spider = self.spider_class("example.com")
+        self.assertEqual(spider._get_sitemap_body(response), body)
 
+    def test_get_sitemap_body(self):
         r = XmlResponse(url="http://www.example.com/", body=self.BODY)
-        self.assertEqual(spider._get_sitemap_body(r), self.BODY)
+        self.assertSitemapBody(r, self.BODY)
 
         r = HtmlResponse(url="http://www.example.com/", body=self.BODY)
-        self.assertEqual(spider._get_sitemap_body(r), None)
+        self.assertSitemapBody(r, None)
 
         r = Response(url="http://www.example.com/favicon.ico", body=self.BODY)
-        self.assertEqual(spider._get_sitemap_body(r), None)
+        self.assertSitemapBody(r, None)
 
-        r = Response(url="http://www.example.com/sitemap", body=self.GZBODY, headers={"content-type": "application/gzip"})
-        self.assertEqual(spider._get_sitemap_body(r), self.BODY)
+    def test_get_sitemap_body_gzip_headers(self):
+        r = Response(url="http://www.example.com/sitemap", body=self.GZBODY,
+                     headers={"content-type": "application/gzip"})
+        self.assertSitemapBody(r, self.BODY)
 
+    def test_get_sitemap_body_xml_url(self):
         r = TextResponse(url="http://www.example.com/sitemap.xml", body=self.BODY)
-        self.assertEqual(spider._get_sitemap_body(r), self.BODY)
+        self.assertSitemapBody(r, self.BODY)
 
+    def test_get_sitemap_body_xml_url_compressed(self):
         r = Response(url="http://www.example.com/sitemap.xml.gz", body=self.GZBODY)
-        self.assertEqual(spider._get_sitemap_body(r), self.BODY)
+        self.assertSitemapBody(r, self.BODY)
+
+        # .xml.gz but body decoded by HttpCompression middleware already
+        r = Response(url="http://www.example.com/sitemap.xml.gz", body=self.BODY)
+        self.assertSitemapBody(r, self.BODY)
+
+    def test_get_sitemap_urls_from_robotstxt(self):
+        robots = b"""# Sitemap files
+Sitemap: http://example.com/sitemap.xml
+Sitemap: http://example.com/sitemap-product-index.xml
+Sitemap: HTTP://example.com/sitemap-uppercase.xml
+Sitemap: /sitemap-relative-url.xml
+"""
+
+        r = TextResponse(url="http://www.example.com/robots.txt", body=robots)
+        spider = self.spider_class("example.com")
+        self.assertEqual([req.url for req in spider._parse_sitemap(r)],
+                         ['http://example.com/sitemap.xml',
+                          'http://example.com/sitemap-product-index.xml',
+                          'http://example.com/sitemap-uppercase.xml',
+                          'http://www.example.com/sitemap-relative-url.xml'])
+
+    def test_alternate_url_locs(self):
+        sitemap = b"""<?xml version="1.0" encoding="UTF-8"?>
+    <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
+        xmlns:xhtml="http://www.w3.org/1999/xhtml">
+        <url>
+            <loc>http://www.example.com/english/</loc>
+            <xhtml:link rel="alternate" hreflang="de"
+                href="http://www.example.com/deutsch/"/>
+            <xhtml:link rel="alternate" hreflang="de-ch"
+                href="http://www.example.com/schweiz-deutsch/"/>
+            <xhtml:link rel="alternate" hreflang="it"
+                href="http://www.example.com/italiano/"/>
+            <xhtml:link rel="alternate" hreflang="it"/><!-- wrong tag without href -->
+        </url>
+    </urlset>"""
+        r = TextResponse(url="http://www.example.com/sitemap.xml", body=sitemap)
+        spider = self.spider_class("example.com")
+        self.assertEqual([req.url for req in spider._parse_sitemap(r)],
+                         ['http://www.example.com/english/'])
+
+        spider.sitemap_alternate_links = True
+        self.assertEqual([req.url for req in spider._parse_sitemap(r)],
+                         ['http://www.example.com/english/',
+                          'http://www.example.com/deutsch/',
+                          'http://www.example.com/schweiz-deutsch/',
+                          'http://www.example.com/italiano/'])
+
+    def test_sitemap_filter(self):
+        sitemap = b"""<?xml version="1.0" encoding="UTF-8"?>
+    <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
+        xmlns:xhtml="http://www.w3.org/1999/xhtml">
+        <url>
+            <loc>http://www.example.com/english/</loc>
+            <lastmod>2010-01-01</lastmod>
+        </url>
+        <url>
+            <loc>http://www.example.com/portuguese/</loc>
+            <lastmod>2005-01-01</lastmod>
+        </url>
+    </urlset>"""
+
+        class FilteredSitemapSpider(self.spider_class):
+            def sitemap_filter(self, entries):
+                from datetime import datetime
+                for entry in entries:
+                    date_time = datetime.strptime(entry['lastmod'], '%Y-%m-%d')
+                    if date_time.year > 2008:
+                        yield entry
+
+        r = TextResponse(url="http://www.example.com/sitemap.xml", body=sitemap)
+        spider = self.spider_class("example.com")
+        self.assertEqual([req.url for req in spider._parse_sitemap(r)],
+                         ['http://www.example.com/english/',
+                          'http://www.example.com/portuguese/'])
+
+        spider = FilteredSitemapSpider("example.com")
+        self.assertEqual([req.url for req in spider._parse_sitemap(r)],
+                         ['http://www.example.com/english/'])
+
+    def test_sitemap_filter_with_alternate_links(self):
+        sitemap = b"""<?xml version="1.0" encoding="UTF-8"?>
+    <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
+        xmlns:xhtml="http://www.w3.org/1999/xhtml">
+        <url>
+            <loc>http://www.example.com/english/article_1/</loc>
+            <lastmod>2010-01-01</lastmod>
+            <xhtml:link rel="alternate" hreflang="de"
+                href="http://www.example.com/deutsch/article_1/"/>
+        </url>
+        <url>
+            <loc>http://www.example.com/english/article_2/</loc>
+            <lastmod>2015-01-01</lastmod>
+        </url>
+    </urlset>"""
+
+        class FilteredSitemapSpider(self.spider_class):
+            def sitemap_filter(self, entries):
+                for entry in entries:
+                    alternate_links = entry.get('alternate', tuple())
+                    for link in alternate_links:
+                        if '/deutsch/' in link:
+                            entry['loc'] = link
+                            yield entry
+
+        r = TextResponse(url="http://www.example.com/sitemap.xml", body=sitemap)
+        spider = self.spider_class("example.com")
+        self.assertEqual([req.url for req in spider._parse_sitemap(r)],
+                         ['http://www.example.com/english/article_1/',
+                          'http://www.example.com/english/article_2/'])
+
+        spider = FilteredSitemapSpider("example.com")
+        self.assertEqual([req.url for req in spider._parse_sitemap(r)],
+                         ['http://www.example.com/deutsch/article_1/'])
+
+    def test_sitemapindex_filter(self):
+        sitemap = b"""<?xml version="1.0" encoding="UTF-8"?>
+    <sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+        <sitemap>
+            <loc>http://www.example.com/sitemap1.xml</loc>
+            <lastmod>2004-01-01T20:00:00+00:00</lastmod>
+        </sitemap>
+        <sitemap>
+            <loc>http://www.example.com/sitemap2.xml</loc>
+            <lastmod>2005-01-01</lastmod>
+        </sitemap>
+    </sitemapindex>"""
+
+        class FilteredSitemapSpider(self.spider_class):
+            def sitemap_filter(self, entries):
+                from datetime import datetime
+                for entry in entries:
+                    date_time = datetime.strptime(entry['lastmod'].split('T')[0], '%Y-%m-%d')
+                    if date_time.year > 2004:
+                        yield entry
+
+        r = TextResponse(url="http://www.example.com/sitemap.xml", body=sitemap)
+        spider = self.spider_class("example.com")
+        self.assertEqual([req.url for req in spider._parse_sitemap(r)],
+                         ['http://www.example.com/sitemap1.xml',
+                          'http://www.example.com/sitemap2.xml'])
+
+        spider = FilteredSitemapSpider("example.com")
+        self.assertEqual([req.url for req in spider._parse_sitemap(r)],
+                         ['http://www.example.com/sitemap2.xml'])
 
 
-class BaseSpiderDeprecationTest(unittest.TestCase):
-
-    def test_basespider_is_deprecated(self):
-        with warnings.catch_warnings(record=True) as w:
-
-            class MySpider1(BaseSpider):
-                pass
-
-            self.assertEqual(len(w), 1)
-            self.assertEqual(w[0].category, ScrapyDeprecationWarning)
-            self.assertEqual(w[0].lineno, inspect.getsourcelines(MySpider1)[1])
-
-    def test_basespider_issubclass(self):
-        class MySpider2(Spider):
-            pass
-
-        class MySpider2a(MySpider2):
-            pass
-
-        class Foo(object):
-            pass
-
-        class Foo2(object_ref):
-            pass
-
-        assert issubclass(MySpider2, BaseSpider)
-        assert issubclass(MySpider2a, BaseSpider)
-        assert not issubclass(Foo, BaseSpider)
-        assert not issubclass(Foo2, BaseSpider)
-
-    def test_basespider_isinstance(self):
-        class MySpider3(Spider):
-            name = 'myspider3'
-
-        class MySpider3a(MySpider3):
-            pass
-
-        class Foo(object):
-            pass
-
-        class Foo2(object_ref):
-            pass
-
-        assert isinstance(MySpider3(), BaseSpider)
-        assert isinstance(MySpider3a(), BaseSpider)
-        assert not isinstance(Foo(), BaseSpider)
-        assert not isinstance(Foo2(), BaseSpider)
+class DeprecationTest(unittest.TestCase):
 
     def test_crawl_spider(self):
         assert issubclass(CrawlSpider, Spider)
-        assert issubclass(CrawlSpider, BaseSpider)
         assert isinstance(CrawlSpider(name='foo'), Spider)
-        assert isinstance(CrawlSpider(name='foo'), BaseSpider)
+
+    def test_make_requests_from_url_deprecated(self):
+        class MySpider4(Spider):
+            name = 'spider1'
+            start_urls = ['http://example.com']
+
+        class MySpider5(Spider):
+            name = 'spider2'
+            start_urls = ['http://example.com']
+
+            def make_requests_from_url(self, url):
+                return Request(url + "/foo", dont_filter=True)
+
+        with warnings.catch_warnings(record=True) as w:
+            # spider without overridden make_requests_from_url method
+            # doesn't issue a warning
+            spider1 = MySpider4()
+            self.assertEqual(len(list(spider1.start_requests())), 1)
+            self.assertEqual(len(w), 0)
+
+            # spider without overridden make_requests_from_url method
+            # should issue a warning when called directly
+            request = spider1.make_requests_from_url("http://www.example.com")
+            self.assertTrue(isinstance(request, Request))
+            self.assertEqual(len(w), 1)
+
+            # spider with overridden make_requests_from_url issues a warning,
+            # but the method still works
+            spider2 = MySpider5()
+            requests = list(spider2.start_requests())
+            self.assertEqual(len(requests), 1)
+            self.assertEqual(requests[0].url, 'http://example.com/foo')
+            self.assertEqual(len(w), 2)
 
 
-if __name__ == '__main__':
-    unittest.main()
+class NoParseMethodSpiderTest(unittest.TestCase):
+
+    spider_class = Spider
+
+    def test_undefined_parse_method(self):
+        spider = self.spider_class('example.com')
+        text = b'Random text'
+        resp = TextResponse(url="http://www.example.com/random_url", body=text)
+
+        exc_msg = 'Spider.parse callback is not defined'
+        with self.assertRaisesRegex(NotImplementedError, exc_msg):
+            spider.parse(resp)
diff --git a/tests/test_spiderloader/__init__.py b/tests/test_spiderloader/__init__.py
new file mode 100644
index 000000000..4929f1e3e
--- /dev/null
+++ b/tests/test_spiderloader/__init__.py
@@ -0,0 +1,187 @@
+import sys
+import os
+import shutil
+import warnings
+
+from zope.interface.verify import verifyObject
+from twisted.trial import unittest
+
+
+# ugly hack to avoid cyclic imports of scrapy.spiders when running this test
+# alone
+import scrapy
+import tempfile
+from scrapy.interfaces import ISpiderLoader
+from scrapy.spiderloader import SpiderLoader
+from scrapy.settings import Settings
+from scrapy.http import Request
+from scrapy.crawler import CrawlerRunner
+
+module_dir = os.path.dirname(os.path.abspath(__file__))
+
+
+def _copytree(source, target):
+    try:
+        shutil.copytree(source, target)
+    except shutil.Error:
+        pass
+
+
+class SpiderLoaderTest(unittest.TestCase):
+
+    def setUp(self):
+        orig_spiders_dir = os.path.join(module_dir, 'test_spiders')
+        self.tmpdir = tempfile.mkdtemp()
+        self.spiders_dir = os.path.join(self.tmpdir, 'test_spiders_xxx')
+        _copytree(orig_spiders_dir, self.spiders_dir)
+        sys.path.append(self.tmpdir)
+        settings = Settings({'SPIDER_MODULES': ['test_spiders_xxx']})
+        self.spider_loader = SpiderLoader.from_settings(settings)
+
+    def tearDown(self):
+        del self.spider_loader
+        del sys.modules['test_spiders_xxx']
+        sys.path.remove(self.tmpdir)
+
+    def test_interface(self):
+        verifyObject(ISpiderLoader, self.spider_loader)
+
+    def test_list(self):
+        self.assertEqual(
+            set(self.spider_loader.list()),
+            {'spider1', 'spider2', 'spider3', 'spider4'})
+
+    def test_load(self):
+        spider1 = self.spider_loader.load("spider1")
+        self.assertEqual(spider1.__name__, 'Spider1')
+
+    def test_find_by_request(self):
+        self.assertEqual(
+            self.spider_loader.find_by_request(Request('http://scrapy1.org/test')),
+            ['spider1'])
+        self.assertEqual(
+            self.spider_loader.find_by_request(Request('http://scrapy2.org/test')),
+            ['spider2'])
+        self.assertEqual(
+            set(self.spider_loader.find_by_request(Request('http://scrapy3.org/test'))),
+            {'spider1', 'spider2'})
+        self.assertEqual(
+            self.spider_loader.find_by_request(Request('http://scrapy999.org/test')),
+            [])
+        self.assertEqual(
+            self.spider_loader.find_by_request(Request('http://spider3.com')),
+            [])
+        self.assertEqual(
+            self.spider_loader.find_by_request(Request('http://spider3.com/onlythis')),
+            ['spider3'])
+
+    def test_load_spider_module(self):
+        module = 'tests.test_spiderloader.test_spiders.spider1'
+        settings = Settings({'SPIDER_MODULES': [module]})
+        self.spider_loader = SpiderLoader.from_settings(settings)
+        assert len(self.spider_loader._spiders) == 1
+
+    def test_load_spider_module_multiple(self):
+        prefix = 'tests.test_spiderloader.test_spiders.'
+        module = ','.join(prefix + s for s in ('spider1', 'spider2'))
+        settings = Settings({'SPIDER_MODULES': module})
+        self.spider_loader = SpiderLoader.from_settings(settings)
+        assert len(self.spider_loader._spiders) == 2
+
+    def test_load_base_spider(self):
+        module = 'tests.test_spiderloader.test_spiders.spider0'
+        settings = Settings({'SPIDER_MODULES': [module]})
+        self.spider_loader = SpiderLoader.from_settings(settings)
+        assert len(self.spider_loader._spiders) == 0
+
+    def test_crawler_runner_loading(self):
+        module = 'tests.test_spiderloader.test_spiders.spider1'
+        runner = CrawlerRunner({'SPIDER_MODULES': [module]})
+
+        self.assertRaisesRegex(KeyError, 'Spider not found',
+                               runner.create_crawler, 'spider2')
+
+        crawler = runner.create_crawler('spider1')
+        self.assertTrue(issubclass(crawler.spidercls, scrapy.Spider))
+        self.assertEqual(crawler.spidercls.name, 'spider1')
+
+    def test_bad_spider_modules_exception(self):
+
+        module = 'tests.test_spiderloader.test_spiders.doesnotexist'
+        settings = Settings({'SPIDER_MODULES': [module]})
+        self.assertRaises(ImportError, SpiderLoader.from_settings, settings)
+
+    def test_bad_spider_modules_warning(self):
+
+        with warnings.catch_warnings(record=True) as w:
+            module = 'tests.test_spiderloader.test_spiders.doesnotexist'
+            settings = Settings({'SPIDER_MODULES': [module],
+                                 'SPIDER_LOADER_WARN_ONLY': True})
+            spider_loader = SpiderLoader.from_settings(settings)
+            self.assertIn("Could not load spiders from module", str(w[0].message))
+
+            spiders = spider_loader.list()
+            self.assertEqual(spiders, [])
+
+
+class DuplicateSpiderNameLoaderTest(unittest.TestCase):
+
+    def setUp(self):
+        orig_spiders_dir = os.path.join(module_dir, 'test_spiders')
+        self.tmpdir = self.mktemp()
+        os.mkdir(self.tmpdir)
+        self.spiders_dir = os.path.join(self.tmpdir, 'test_spiders_xxx')
+        _copytree(orig_spiders_dir, self.spiders_dir)
+        sys.path.append(self.tmpdir)
+        self.settings = Settings({'SPIDER_MODULES': ['test_spiders_xxx']})
+
+    def tearDown(self):
+        del sys.modules['test_spiders_xxx']
+        sys.path.remove(self.tmpdir)
+
+    def test_dupename_warning(self):
+        # copy 1 spider module so as to have duplicate spider name
+        shutil.copyfile(os.path.join(self.tmpdir, 'test_spiders_xxx', 'spider3.py'),
+                        os.path.join(self.tmpdir, 'test_spiders_xxx', 'spider3dupe.py'))
+
+        with warnings.catch_warnings(record=True) as w:
+            spider_loader = SpiderLoader.from_settings(self.settings)
+
+            self.assertEqual(len(w), 1)
+            msg = str(w[0].message)
+            self.assertIn("several spiders with the same name", msg)
+            self.assertIn("'spider3'", msg)
+            self.assertTrue(msg.count("'spider3'") == 2)
+
+            self.assertNotIn("'spider1'", msg)
+            self.assertNotIn("'spider2'", msg)
+            self.assertNotIn("'spider4'", msg)
+
+            spiders = set(spider_loader.list())
+            self.assertEqual(spiders, {'spider1', 'spider2', 'spider3', 'spider4'})
+
+    def test_multiple_dupename_warning(self):
+        # copy 2 spider modules so as to have duplicate spider name
+        # This should issue 2 warning, 1 for each duplicate spider name
+        shutil.copyfile(os.path.join(self.tmpdir, 'test_spiders_xxx', 'spider1.py'),
+                        os.path.join(self.tmpdir, 'test_spiders_xxx', 'spider1dupe.py'))
+        shutil.copyfile(os.path.join(self.tmpdir, 'test_spiders_xxx', 'spider2.py'),
+                        os.path.join(self.tmpdir, 'test_spiders_xxx', 'spider2dupe.py'))
+
+        with warnings.catch_warnings(record=True) as w:
+            spider_loader = SpiderLoader.from_settings(self.settings)
+
+            self.assertEqual(len(w), 1)
+            msg = str(w[0].message)
+            self.assertIn("several spiders with the same name", msg)
+            self.assertIn("'spider1'", msg)
+            self.assertTrue(msg.count("'spider1'") == 2)
+
+            self.assertIn("'spider2'", msg)
+            self.assertTrue(msg.count("'spider2'") == 2)
+
+            self.assertNotIn("'spider3'", msg)
+            self.assertNotIn("'spider4'", msg)
+
+            spiders = set(spider_loader.list())
+            self.assertEqual(spiders, {'spider1', 'spider2', 'spider3', 'spider4'})
diff --git a/tests/test_spiderloader/test_spiders/__init__.py b/tests/test_spiderloader/test_spiders/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/test_spiderloader/test_spiders/nested/__init__.py b/tests/test_spiderloader/test_spiders/nested/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/test_spiderloader/test_spiders/nested/spider4.py b/tests/test_spiderloader/test_spiders/nested/spider4.py
new file mode 100644
index 000000000..dbd1fb123
--- /dev/null
+++ b/tests/test_spiderloader/test_spiders/nested/spider4.py
@@ -0,0 +1,10 @@
+from scrapy.spiders import Spider
+
+
+class Spider4(Spider):
+    name = "spider4"
+    allowed_domains = ['spider4.com']
+
+    @classmethod
+    def handles_request(cls, request):
+        return request.url == 'http://spider4.com/onlythis'
diff --git a/tests/test_spidermanager/test_spiders/spider0.py b/tests/test_spiderloader/test_spiders/spider0.py
similarity index 68%
rename from tests/test_spidermanager/test_spiders/spider0.py
rename to tests/test_spiderloader/test_spiders/spider0.py
index f1f19a1eb..af679dbd6 100644
--- a/tests/test_spidermanager/test_spiders/spider0.py
+++ b/tests/test_spiderloader/test_spiders/spider0.py
@@ -1,4 +1,5 @@
-from scrapy.spider import Spider
+from scrapy.spiders import Spider
+
 
 class Spider0(Spider):
     allowed_domains = ["scrapy1.org", "scrapy3.org"]
diff --git a/tests/test_spidermanager/test_spiders/spider1.py b/tests/test_spiderloader/test_spiders/spider1.py
similarity index 73%
rename from tests/test_spidermanager/test_spiders/spider1.py
rename to tests/test_spiderloader/test_spiders/spider1.py
index 16a533ca2..6b4317a90 100644
--- a/tests/test_spidermanager/test_spiders/spider1.py
+++ b/tests/test_spiderloader/test_spiders/spider1.py
@@ -1,4 +1,5 @@
-from scrapy.spider import Spider
+from scrapy.spiders import Spider
+
 
 class Spider1(Spider):
     name = "spider1"
diff --git a/tests/test_spidermanager/test_spiders/spider2.py b/tests/test_spiderloader/test_spiders/spider2.py
similarity index 73%
rename from tests/test_spidermanager/test_spiders/spider2.py
rename to tests/test_spiderloader/test_spiders/spider2.py
index 4af6f7c41..352601863 100644
--- a/tests/test_spidermanager/test_spiders/spider2.py
+++ b/tests/test_spiderloader/test_spiders/spider2.py
@@ -1,4 +1,5 @@
-from scrapy.spider import Spider
+from scrapy.spiders import Spider
+
 
 class Spider2(Spider):
     name = "spider2"
diff --git a/tests/test_spidermanager/test_spiders/spider3.py b/tests/test_spiderloader/test_spiders/spider3.py
similarity index 85%
rename from tests/test_spidermanager/test_spiders/spider3.py
rename to tests/test_spiderloader/test_spiders/spider3.py
index b3e5f3da7..84998ba35 100644
--- a/tests/test_spidermanager/test_spiders/spider3.py
+++ b/tests/test_spiderloader/test_spiders/spider3.py
@@ -1,4 +1,5 @@
-from scrapy.spider import Spider
+from scrapy.spiders import Spider
+
 
 class Spider3(Spider):
     name = "spider3"
diff --git a/tests/test_spidermanager/__init__.py b/tests/test_spidermanager/__init__.py
deleted file mode 100644
index b0dd9a851..000000000
--- a/tests/test_spidermanager/__init__.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import sys
-import os
-import shutil
-
-from zope.interface.verify import verifyObject
-from twisted.trial import unittest
-
-
-# ugly hack to avoid cyclic imports of scrapy.spider when running this test
-# alone
-from scrapy.interfaces import ISpiderManager
-from scrapy.spidermanager import SpiderManager
-from scrapy.http import Request
-
-module_dir = os.path.dirname(os.path.abspath(__file__))
-
-class SpiderManagerTest(unittest.TestCase):
-
-    def setUp(self):
-        orig_spiders_dir = os.path.join(module_dir, 'test_spiders')
-        self.tmpdir = self.mktemp()
-        os.mkdir(self.tmpdir)
-        self.spiders_dir = os.path.join(self.tmpdir, 'test_spiders_xxx')
-        shutil.copytree(orig_spiders_dir, self.spiders_dir)
-        sys.path.append(self.tmpdir)
-        self.spiderman = SpiderManager(['test_spiders_xxx'])
-
-    def tearDown(self):
-        del self.spiderman
-        del sys.modules['test_spiders_xxx']
-        sys.path.remove(self.tmpdir)
-
-    def test_interface(self):
-        verifyObject(ISpiderManager, self.spiderman)
-
-    def test_list(self):
-        self.assertEqual(set(self.spiderman.list()),
-            set(['spider1', 'spider2', 'spider3', 'spider4']))
-
-    def test_create(self):
-        spider1 = self.spiderman.create("spider1")
-        self.assertEqual(spider1.__class__.__name__, 'Spider1')
-        spider2 = self.spiderman.create("spider2", foo="bar")
-        self.assertEqual(spider2.__class__.__name__, 'Spider2')
-        self.assertEqual(spider2.foo, 'bar')
-
-    def test_find_by_request(self):
-        self.assertEqual(self.spiderman.find_by_request(Request('http://scrapy1.org/test')),
-            ['spider1'])
-        self.assertEqual(self.spiderman.find_by_request(Request('http://scrapy2.org/test')),
-            ['spider2'])
-        self.assertEqual(set(self.spiderman.find_by_request(Request('http://scrapy3.org/test'))),
-            set(['spider1', 'spider2']))
-        self.assertEqual(self.spiderman.find_by_request(Request('http://scrapy999.org/test')),
-            [])
-        self.assertEqual(self.spiderman.find_by_request(Request('http://spider3.com')),
-            [])
-        self.assertEqual(self.spiderman.find_by_request(Request('http://spider3.com/onlythis')),
-            ['spider3'])
-
-    def test_load_spider_module(self):
-        self.spiderman = SpiderManager(['tests.test_spidermanager.test_spiders.spider1'])
-        assert len(self.spiderman._spiders) == 1
-
-    def test_load_base_spider(self):
-        self.spiderman = SpiderManager(['tests.test_spidermanager.test_spiders.spider0'])
-        assert len(self.spiderman._spiders) == 0
-
-    def test_load_from_crawler(self):
-        spider = self.spiderman.create('spider4', a='OK')
-        self.assertEqual(spider.a, 'OK')
diff --git a/tests/test_spidermanager/test_spiders/spider4.py b/tests/test_spidermanager/test_spiders/spider4.py
deleted file mode 100644
index e883e4d93..000000000
--- a/tests/test_spidermanager/test_spiders/spider4.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from scrapy.spider import Spider
-
-class Spider4(Spider):
-    name = "spider4"
-
-    @classmethod
-    def from_crawler(cls, crawler, **kwargs):
-        o = cls(**kwargs)
-        o.crawler = crawler
-        return o
diff --git a/tests/test_spidermiddleware.py b/tests/test_spidermiddleware.py
new file mode 100644
index 000000000..78e926adc
--- /dev/null
+++ b/tests/test_spidermiddleware.py
@@ -0,0 +1,103 @@
+from unittest import mock
+
+from twisted.trial.unittest import TestCase
+from twisted.python.failure import Failure
+
+from scrapy.spiders import Spider
+from scrapy.http import Request, Response
+from scrapy.exceptions import _InvalidOutput
+from scrapy.utils.test import get_crawler
+from scrapy.core.spidermw import SpiderMiddlewareManager
+
+
+class SpiderMiddlewareTestCase(TestCase):
+
+    def setUp(self):
+        self.request = Request('http://example.com/index.html')
+        self.response = Response(self.request.url, request=self.request)
+        self.crawler = get_crawler(Spider)
+        self.spider = self.crawler._create_spider('foo')
+        self.mwman = SpiderMiddlewareManager.from_crawler(self.crawler)
+
+    def _scrape_response(self):
+        """Execute spider mw manager's scrape_response method and return the result.
+        Raise exception in case of failure.
+        """
+        scrape_func = mock.MagicMock()
+        dfd = self.mwman.scrape_response(scrape_func, self.response, self.request, self.spider)
+        # catch deferred result and return the value
+        results = []
+        dfd.addBoth(results.append)
+        self._wait(dfd)
+        ret = results[0]
+        return ret
+
+
+class ProcessSpiderInputInvalidOutput(SpiderMiddlewareTestCase):
+    """Invalid return value for process_spider_input method"""
+
+    def test_invalid_process_spider_input(self):
+
+        class InvalidProcessSpiderInputMiddleware:
+            def process_spider_input(self, response, spider):
+                return 1
+
+        self.mwman._add_middleware(InvalidProcessSpiderInputMiddleware())
+        result = self._scrape_response()
+        self.assertIsInstance(result, Failure)
+        self.assertIsInstance(result.value, _InvalidOutput)
+
+
+class ProcessSpiderOutputInvalidOutput(SpiderMiddlewareTestCase):
+    """Invalid return value for process_spider_output method"""
+
+    def test_invalid_process_spider_output(self):
+
+        class InvalidProcessSpiderOutputMiddleware:
+            def process_spider_output(self, response, result, spider):
+                return 1
+
+        self.mwman._add_middleware(InvalidProcessSpiderOutputMiddleware())
+        result = self._scrape_response()
+        self.assertIsInstance(result, Failure)
+        self.assertIsInstance(result.value, _InvalidOutput)
+
+
+class ProcessSpiderExceptionInvalidOutput(SpiderMiddlewareTestCase):
+    """Invalid return value for process_spider_exception method"""
+
+    def test_invalid_process_spider_exception(self):
+
+        class InvalidProcessSpiderOutputExceptionMiddleware:
+            def process_spider_exception(self, response, exception, spider):
+                return 1
+
+        class RaiseExceptionProcessSpiderOutputMiddleware:
+            def process_spider_output(self, response, result, spider):
+                raise Exception()
+
+        self.mwman._add_middleware(InvalidProcessSpiderOutputExceptionMiddleware())
+        self.mwman._add_middleware(RaiseExceptionProcessSpiderOutputMiddleware())
+        result = self._scrape_response()
+        self.assertIsInstance(result, Failure)
+        self.assertIsInstance(result.value, _InvalidOutput)
+
+
+class ProcessSpiderExceptionReRaise(SpiderMiddlewareTestCase):
+    """Re raise the exception by returning None"""
+
+    def test_process_spider_exception_return_none(self):
+
+        class ProcessSpiderExceptionReturnNoneMiddleware:
+            def process_spider_exception(self, response, exception, spider):
+                return None
+
+        class RaiseExceptionProcessSpiderOutputMiddleware:
+            def process_spider_output(self, response, result, spider):
+                1 / 0
+
+        self.mwman._add_middleware(ProcessSpiderExceptionReturnNoneMiddleware())
+        self.mwman._add_middleware(RaiseExceptionProcessSpiderOutputMiddleware())
+        result = self._scrape_response()
+        self.assertIsInstance(result, Failure)
+        self.assertIsInstance(result.value, ZeroDivisionError)
diff --git a/tests/test_spidermiddleware_depth.py b/tests/test_spidermiddleware_depth.py
index 94404ff41..71cca2472 100644
--- a/tests/test_spidermiddleware_depth.py
+++ b/tests/test_spidermiddleware_depth.py
@@ -1,18 +1,19 @@
 from unittest import TestCase
 
-from scrapy.contrib.spidermiddleware.depth import DepthMiddleware
+from scrapy.spidermiddlewares.depth import DepthMiddleware
 from scrapy.http import Response, Request
-from scrapy.spider import Spider
-from scrapy.statscol import StatsCollector
+from scrapy.spiders import Spider
+from scrapy.statscollectors import StatsCollector
 from scrapy.utils.test import get_crawler
 
 
 class TestDepthMiddleware(TestCase):
 
     def setUp(self):
-        self.spider = Spider('scrapytest.org')
+        crawler = get_crawler(Spider)
+        self.spider = crawler._create_spider('scrapytest.org')
 
-        self.stats = StatsCollector(get_crawler())
+        self.stats = StatsCollector(crawler)
         self.stats.open_spider(self.spider)
 
         self.mw = DepthMiddleware(1, self.stats, True)
@@ -24,19 +25,18 @@ class TestDepthMiddleware(TestCase):
         result = [Request('http://scrapytest.org')]
 
         out = list(self.mw.process_spider_output(resp, result, self.spider))
-        self.assertEquals(out, result)
+        self.assertEqual(out, result)
 
         rdc = self.stats.get_value('request_depth_count/1', spider=self.spider)
-        self.assertEquals(rdc, 1)
+        self.assertEqual(rdc, 1)
 
         req.meta['depth'] = 1
 
         out2 = list(self.mw.process_spider_output(resp, result, self.spider))
-        self.assertEquals(out2, [])
+        self.assertEqual(out2, [])
 
         rdm = self.stats.get_value('request_depth_max', spider=self.spider)
-        self.assertEquals(rdm, 1)
- 
+        self.assertEqual(rdm, 1)
+
     def tearDown(self):
         self.stats.close_spider(self.spider, '')
-
diff --git a/tests/test_spidermiddleware_httperror.py b/tests/test_spidermiddleware_httperror.py
index 788a0986b..e449cd706 100644
--- a/tests/test_spidermiddleware_httperror.py
+++ b/tests/test_spidermiddleware_httperror.py
@@ -1,28 +1,31 @@
+import logging
 from unittest import TestCase
 
+from testfixtures import LogCapture
 from twisted.trial.unittest import TestCase as TrialTestCase
 from twisted.internet import defer
 
-from scrapy.utils.test import docrawl, get_testlog
+from scrapy.utils.test import get_crawler
 from tests.mockserver import MockServer
 from scrapy.http import Response, Request
-from scrapy.spider import Spider
-from scrapy.contrib.spidermiddleware.httperror import HttpErrorMiddleware, HttpError
+from scrapy.spiders import Spider
+from scrapy.spidermiddlewares.httperror import HttpErrorMiddleware, HttpError
 from scrapy.settings import Settings
+from tests.spiders import MockServerSpider
 
 
-class _HttpErrorSpider(Spider):
+class _HttpErrorSpider(MockServerSpider):
     name = 'httperror'
-    start_urls = [
-        "http://localhost:8998/status?n=200",
-        "http://localhost:8998/status?n=404",
-        "http://localhost:8998/status?n=402",
-        "http://localhost:8998/status?n=500",
-    ]
     bypass_status_codes = set()
 
     def __init__(self, *args, **kwargs):
-        super(_HttpErrorSpider, self).__init__(*args, **kwargs)
+        super().__init__(*args, **kwargs)
+        self.start_urls = [
+            self.mockserver.url("/status?n=200"),
+            self.mockserver.url("/status?n=404"),
+            self.mockserver.url("/status?n=402"),
+            self.mockserver.url("/status?n=500"),
+        ]
         self.failed = set()
         self.skipped = set()
         self.parsed = set()
@@ -58,35 +61,30 @@ def _responses(request, status_codes):
 class TestHttpErrorMiddleware(TestCase):
 
     def setUp(self):
-        self.spider = Spider('foo')
+        crawler = get_crawler(Spider)
+        self.spider = Spider.from_crawler(crawler, name='foo')
         self.mw = HttpErrorMiddleware(Settings({}))
         self.req = Request('http://scrapytest.org')
         self.res200, self.res404 = _responses(self.req, [200, 404])
 
     def test_process_spider_input(self):
-        self.assertEquals(None,
-                self.mw.process_spider_input(self.res200, self.spider))
-        self.assertRaises(HttpError,
-                self.mw.process_spider_input, self.res404, self.spider)
+        self.assertIsNone(self.mw.process_spider_input(self.res200, self.spider))
+        self.assertRaises(HttpError, self.mw.process_spider_input, self.res404, self.spider)
 
     def test_process_spider_exception(self):
-        self.assertEquals([],
-                self.mw.process_spider_exception(self.res404, \
-                        HttpError(self.res404), self.spider))
-        self.assertEquals(None,
-                self.mw.process_spider_exception(self.res404, \
-                        Exception(), self.spider))
+        self.assertEqual(
+            [],
+            self.mw.process_spider_exception(self.res404, HttpError(self.res404), self.spider))
+        self.assertIsNone(self.mw.process_spider_exception(self.res404, Exception(), self.spider))
 
     def test_handle_httpstatus_list(self):
         res = self.res404.copy()
         res.request = Request('http://scrapytest.org',
                               meta={'handle_httpstatus_list': [404]})
-        self.assertEquals(None,
-            self.mw.process_spider_input(res, self.spider))
+        self.assertIsNone(self.mw.process_spider_input(res, self.spider))
 
         self.spider.handle_httpstatus_list = [404]
-        self.assertEquals(None,
-            self.mw.process_spider_input(self.res404, self.spider))
+        self.assertIsNone(self.mw.process_spider_input(self.res404, self.spider))
 
 
 class TestHttpErrorMiddlewareSettings(TestCase):
@@ -99,32 +97,24 @@ class TestHttpErrorMiddlewareSettings(TestCase):
         self.res200, self.res404, self.res402 = _responses(self.req, [200, 404, 402])
 
     def test_process_spider_input(self):
-        self.assertEquals(None,
-                self.mw.process_spider_input(self.res200, self.spider))
-        self.assertRaises(HttpError,
-                self.mw.process_spider_input, self.res404, self.spider)
-        self.assertEquals(None,
-                self.mw.process_spider_input(self.res402, self.spider))
+        self.assertIsNone(self.mw.process_spider_input(self.res200, self.spider))
+        self.assertRaises(HttpError, self.mw.process_spider_input, self.res404, self.spider)
+        self.assertIsNone(self.mw.process_spider_input(self.res402, self.spider))
 
     def test_meta_overrides_settings(self):
-        request = Request('http://scrapytest.org',
-                              meta={'handle_httpstatus_list': [404]})
+        request = Request('http://scrapytest.org', meta={'handle_httpstatus_list': [404]})
         res404 = self.res404.copy()
         res404.request = request
         res402 = self.res402.copy()
         res402.request = request
 
-        self.assertEquals(None,
-            self.mw.process_spider_input(res404, self.spider))
-        self.assertRaises(HttpError,
-                self.mw.process_spider_input, res402, self.spider)
+        self.assertIsNone(self.mw.process_spider_input(res404, self.spider))
+        self.assertRaises(HttpError, self.mw.process_spider_input, res402, self.spider)
 
     def test_spider_override_settings(self):
         self.spider.handle_httpstatus_list = [404]
-        self.assertEquals(None,
-            self.mw.process_spider_input(self.res404, self.spider))
-        self.assertRaises(HttpError,
-                self.mw.process_spider_input, self.res402, self.spider)
+        self.assertIsNone(self.mw.process_spider_input(self.res404, self.spider))
+        self.assertRaises(HttpError, self.mw.process_spider_input, self.res402, self.spider)
 
 
 class TestHttpErrorMiddlewareHandleAll(TestCase):
@@ -136,23 +126,18 @@ class TestHttpErrorMiddlewareHandleAll(TestCase):
         self.res200, self.res404, self.res402 = _responses(self.req, [200, 404, 402])
 
     def test_process_spider_input(self):
-        self.assertEquals(None,
-                self.mw.process_spider_input(self.res200, self.spider))
-        self.assertEquals(None,
-                self.mw.process_spider_input(self.res404, self.spider))
+        self.assertIsNone(self.mw.process_spider_input(self.res200, self.spider))
+        self.assertIsNone(self.mw.process_spider_input(self.res404, self.spider))
 
     def test_meta_overrides_settings(self):
-        request = Request('http://scrapytest.org',
-                              meta={'handle_httpstatus_list': [404]})
+        request = Request('http://scrapytest.org', meta={'handle_httpstatus_list': [404]})
         res404 = self.res404.copy()
         res404.request = request
         res402 = self.res402.copy()
         res402.request = request
 
-        self.assertEquals(None,
-            self.mw.process_spider_input(res404, self.spider))
-        self.assertRaises(HttpError,
-                self.mw.process_spider_input, res402, self.spider)
+        self.assertIsNone(self.mw.process_spider_input(res404, self.spider))
+        self.assertRaises(HttpError, self.mw.process_spider_input, res402, self.spider)
 
 
 class TestHttpErrorMiddlewareIntegrational(TrialTestCase):
@@ -165,23 +150,54 @@ class TestHttpErrorMiddlewareIntegrational(TrialTestCase):
 
     @defer.inlineCallbacks
     def test_middleware_works(self):
-        spider = _HttpErrorSpider()
-        yield docrawl(spider)
-        assert not spider.skipped, spider.skipped
-        self.assertEqual(spider.parsed, {'200'})
-        self.assertEqual(spider.failed, {'404', '402', '500'})
+        crawler = get_crawler(_HttpErrorSpider)
+        yield crawler.crawl(mockserver=self.mockserver)
+        assert not crawler.spider.skipped, crawler.spider.skipped
+        self.assertEqual(crawler.spider.parsed, {'200'})
+        self.assertEqual(crawler.spider.failed, {'404', '402', '500'})
+
+        get_value = crawler.stats.get_value
+        self.assertEqual(get_value('httperror/response_ignored_count'), 3)
+        self.assertEqual(get_value('httperror/response_ignored_status_count/404'), 1)
+        self.assertEqual(get_value('httperror/response_ignored_status_count/402'), 1)
+        self.assertEqual(get_value('httperror/response_ignored_status_count/500'), 1)
 
     @defer.inlineCallbacks
     def test_logging(self):
-        spider = _HttpErrorSpider(bypass_status_codes={402})
-        yield docrawl(spider)
-        # print(get_testlog())
-        self.assertEqual(spider.parsed, {'200', '402'})
-        self.assertEqual(spider.skipped, {'402'})
-        self.assertEqual(spider.failed, {'404', '500'})
+        crawler = get_crawler(_HttpErrorSpider)
+        with LogCapture() as log:
+            yield crawler.crawl(mockserver=self.mockserver, bypass_status_codes={402})
+        self.assertEqual(crawler.spider.parsed, {'200', '402'})
+        self.assertEqual(crawler.spider.skipped, {'402'})
+        self.assertEqual(crawler.spider.failed, {'404', '500'})
 
-        log = get_testlog()
-        self.assertIn('Ignoring response <404', log)
-        self.assertIn('Ignoring response <500', log)
-        self.assertNotIn('Ignoring response <200', log)
-        self.assertNotIn('Ignoring response <402', log)
+        self.assertIn('Ignoring response <404', str(log))
+        self.assertIn('Ignoring response <500', str(log))
+        self.assertNotIn('Ignoring response <200', str(log))
+        self.assertNotIn('Ignoring response <402', str(log))
+
+    @defer.inlineCallbacks
+    def test_logging_level(self):
+        # HttpError logs ignored responses with level INFO
+        crawler = get_crawler(_HttpErrorSpider)
+        with LogCapture(level=logging.INFO) as log:
+            yield crawler.crawl(mockserver=self.mockserver)
+        self.assertEqual(crawler.spider.parsed, {'200'})
+        self.assertEqual(crawler.spider.failed, {'404', '402', '500'})
+
+        self.assertIn('Ignoring response <402', str(log))
+        self.assertIn('Ignoring response <404', str(log))
+        self.assertIn('Ignoring response <500', str(log))
+        self.assertNotIn('Ignoring response <200', str(log))
+
+        # with level WARNING, we shouldn't capture anything from HttpError
+        crawler = get_crawler(_HttpErrorSpider)
+        with LogCapture(level=logging.WARNING) as log:
+            yield crawler.crawl(mockserver=self.mockserver)
+        self.assertEqual(crawler.spider.parsed, {'200'})
+        self.assertEqual(crawler.spider.failed, {'404', '402', '500'})
+
+        self.assertNotIn('Ignoring response <402', str(log))
+        self.assertNotIn('Ignoring response <404', str(log))
+        self.assertNotIn('Ignoring response <500', str(log))
+        self.assertNotIn('Ignoring response <200', str(log))
diff --git a/tests/test_spidermiddleware_offsite.py b/tests/test_spidermiddleware_offsite.py
index 298cba6e4..0f4b98a07 100644
--- a/tests/test_spidermiddleware_offsite.py
+++ b/tests/test_spidermiddleware_offsite.py
@@ -1,67 +1,97 @@
 from unittest import TestCase
-
-from six.moves.urllib.parse import urlparse
+from urllib.parse import urlparse
+import warnings
 
 from scrapy.http import Response, Request
-from scrapy.spider import Spider
-from scrapy.contrib.spidermiddleware.offsite import OffsiteMiddleware
+from scrapy.spiders import Spider
+from scrapy.spidermiddlewares.offsite import OffsiteMiddleware, URLWarning, PortWarning
 from scrapy.utils.test import get_crawler
 
+
 class TestOffsiteMiddleware(TestCase):
 
     def setUp(self):
-        self.spider = self._get_spider()
-        crawler = get_crawler()
+        crawler = get_crawler(Spider)
+        self.spider = crawler._create_spider(**self._get_spiderargs())
         self.mw = OffsiteMiddleware.from_crawler(crawler)
         self.mw.spider_opened(self.spider)
 
-    def _get_spider(self):
-        return Spider('foo', allowed_domains=['scrapytest.org', 'scrapy.org'])
+    def _get_spiderargs(self):
+        return dict(name='foo', allowed_domains=['scrapytest.org', 'scrapy.org', 'scrapy.test.org'])
 
     def test_process_spider_output(self):
         res = Response('http://scrapytest.org')
 
-        onsite_reqs = [Request('http://scrapytest.org/1'),
-                       Request('http://scrapy.org/1'),
-                       Request('http://sub.scrapy.org/1'),
-                       Request('http://offsite.tld/letmepass', dont_filter=True)]
-        offsite_reqs = [Request('http://scrapy2.org'),
-                       Request('http://offsite.tld/'),
-                       Request('http://offsite.tld/scrapytest.org'),
-                       Request('http://offsite.tld/rogue.scrapytest.org'),
-                       Request('http://rogue.scrapytest.org.haha.com'),
-                       Request('http://roguescrapytest.org')]
+        onsite_reqs = [
+            Request('http://scrapytest.org/1'),
+            Request('http://scrapy.org/1'),
+            Request('http://sub.scrapy.org/1'),
+            Request('http://offsite.tld/letmepass', dont_filter=True),
+            Request('http://scrapy.test.org/'),
+            Request('http://scrapy.test.org:8000/'),
+        ]
+        offsite_reqs = [
+            Request('http://scrapy2.org'),
+            Request('http://offsite.tld/'),
+            Request('http://offsite.tld/scrapytest.org'),
+            Request('http://offsite.tld/rogue.scrapytest.org'),
+            Request('http://rogue.scrapytest.org.haha.com'),
+            Request('http://roguescrapytest.org'),
+            Request('http://test.org/'),
+            Request('http://notscrapy.test.org/'),
+        ]
         reqs = onsite_reqs + offsite_reqs
 
         out = list(self.mw.process_spider_output(res, reqs, self.spider))
-        self.assertEquals(out, onsite_reqs)
+        self.assertEqual(out, onsite_reqs)
 
 
 class TestOffsiteMiddleware2(TestOffsiteMiddleware):
 
-    def _get_spider(self):
-        return Spider('foo', allowed_domains=None)
+    def _get_spiderargs(self):
+        return dict(name='foo', allowed_domains=None)
 
     def test_process_spider_output(self):
         res = Response('http://scrapytest.org')
         reqs = [Request('http://a.com/b.html'), Request('http://b.com/1')]
         out = list(self.mw.process_spider_output(res, reqs, self.spider))
-        self.assertEquals(out, reqs)
+        self.assertEqual(out, reqs)
+
 
 class TestOffsiteMiddleware3(TestOffsiteMiddleware2):
 
-    def _get_spider(self):
-        return Spider('foo')
+    def _get_spiderargs(self):
+        return dict(name='foo')
 
 
 class TestOffsiteMiddleware4(TestOffsiteMiddleware3):
 
-    def _get_spider(self):
-      bad_hostname = urlparse('http:////scrapytest.org').hostname
-      return Spider('foo', allowed_domains=['scrapytest.org', None, bad_hostname])
+    def _get_spiderargs(self):
+        bad_hostname = urlparse('http:////scrapytest.org').hostname
+        return dict(name='foo', allowed_domains=['scrapytest.org', None, bad_hostname])
 
     def test_process_spider_output(self):
-      res = Response('http://scrapytest.org')
-      reqs = [Request('http://scrapytest.org/1')]
-      out = list(self.mw.process_spider_output(res, reqs, self.spider))
-      self.assertEquals(out, reqs)
+        res = Response('http://scrapytest.org')
+        reqs = [Request('http://scrapytest.org/1')]
+        out = list(self.mw.process_spider_output(res, reqs, self.spider))
+        self.assertEqual(out, reqs)
+
+
+class TestOffsiteMiddleware5(TestOffsiteMiddleware4):
+
+    def test_get_host_regex(self):
+        self.spider.allowed_domains = ['http://scrapytest.org', 'scrapy.org', 'scrapy.test.org']
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            self.mw.get_host_regex(self.spider)
+            assert issubclass(w[-1].category, URLWarning)
+
+
+class TestOffsiteMiddleware6(TestOffsiteMiddleware4):
+
+    def test_get_host_regex(self):
+        self.spider.allowed_domains = ['scrapytest.org:8000', 'scrapy.org', 'scrapy.test.org']
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            self.mw.get_host_regex(self.spider)
+            assert issubclass(w[-1].category, PortWarning)
diff --git a/tests/test_spidermiddleware_output_chain.py b/tests/test_spidermiddleware_output_chain.py
new file mode 100644
index 000000000..79eda35b3
--- /dev/null
+++ b/tests/test_spidermiddleware_output_chain.py
@@ -0,0 +1,435 @@
+from testfixtures import LogCapture
+from twisted.internet import defer
+from twisted.trial.unittest import TestCase
+
+from scrapy import Request, Spider
+from scrapy.utils.test import get_crawler
+
+from tests.mockserver import MockServer
+
+
+class LogExceptionMiddleware:
+    def process_spider_exception(self, response, exception, spider):
+        spider.logger.info('Middleware: %s exception caught', exception.__class__.__name__)
+        return None
+
+
+# ================================================================================
+# (0) recover from an exception on a spider callback
+class RecoverySpider(Spider):
+    name = 'RecoverySpider'
+    custom_settings = {
+        'SPIDER_MIDDLEWARES': {
+            __name__ + '.RecoveryMiddleware': 10,
+        },
+    }
+
+    def start_requests(self):
+        yield Request(self.mockserver.url('/status?n=200'))
+
+    def parse(self, response):
+        yield {'test': 1}
+        self.logger.info('DONT_FAIL: %s', response.meta.get('dont_fail'))
+        if not response.meta.get('dont_fail'):
+            raise TabError()
+
+
+class RecoveryMiddleware:
+    def process_spider_exception(self, response, exception, spider):
+        spider.logger.info('Middleware: %s exception caught', exception.__class__.__name__)
+        return [
+            {'from': 'process_spider_exception'},
+            Request(response.url, meta={'dont_fail': True}, dont_filter=True),
+        ]
+
+
+# ================================================================================
+# (1) exceptions from a spider middleware's process_spider_input method
+class FailProcessSpiderInputMiddleware:
+    def process_spider_input(self, response, spider):
+        spider.logger.info('Middleware: will raise IndexError')
+        raise IndexError()
+
+
+class ProcessSpiderInputSpiderWithoutErrback(Spider):
+    name = 'ProcessSpiderInputSpiderWithoutErrback'
+    custom_settings = {
+        'SPIDER_MIDDLEWARES': {
+            # spider
+            __name__ + '.LogExceptionMiddleware': 10,
+            __name__ + '.FailProcessSpiderInputMiddleware': 8,
+            __name__ + '.LogExceptionMiddleware': 6,
+            # engine
+        }
+    }
+
+    def start_requests(self):
+        yield Request(url=self.mockserver.url('/status?n=200'), callback=self.parse)
+
+    def parse(self, response):
+        return {'from': 'callback'}
+
+
+class ProcessSpiderInputSpiderWithErrback(ProcessSpiderInputSpiderWithoutErrback):
+    name = 'ProcessSpiderInputSpiderWithErrback'
+
+    def start_requests(self):
+        yield Request(self.mockserver.url('/status?n=200'), self.parse, errback=self.errback)
+
+    def errback(self, failure):
+        self.logger.info('Got a Failure on the Request errback')
+        return {'from': 'errback'}
+
+
+# ================================================================================
+# (2) exceptions from a spider callback (generator)
+class GeneratorCallbackSpider(Spider):
+    name = 'GeneratorCallbackSpider'
+    custom_settings = {
+        'SPIDER_MIDDLEWARES': {
+            __name__ + '.LogExceptionMiddleware': 10,
+        },
+    }
+
+    def start_requests(self):
+        yield Request(self.mockserver.url('/status?n=200'))
+
+    def parse(self, response):
+        yield {'test': 1}
+        yield {'test': 2}
+        raise ImportError()
+
+
+# ================================================================================
+# (2.1) exceptions from a spider callback (generator, middleware right after callback)
+class GeneratorCallbackSpiderMiddlewareRightAfterSpider(GeneratorCallbackSpider):
+    name = 'GeneratorCallbackSpiderMiddlewareRightAfterSpider'
+    custom_settings = {
+        'SPIDER_MIDDLEWARES': {
+            __name__ + '.LogExceptionMiddleware': 100000,
+        },
+    }
+
+
+# ================================================================================
+# (3) exceptions from a spider callback (not a generator)
+class NotGeneratorCallbackSpider(Spider):
+    name = 'NotGeneratorCallbackSpider'
+    custom_settings = {
+        'SPIDER_MIDDLEWARES': {
+            __name__ + '.LogExceptionMiddleware': 10,
+        },
+    }
+
+    def start_requests(self):
+        yield Request(self.mockserver.url('/status?n=200'))
+
+    def parse(self, response):
+        return [{'test': 1}, {'test': 1 / 0}]
+
+
+# ================================================================================
+# (3.1) exceptions from a spider callback (not a generator, middleware right after callback)
+class NotGeneratorCallbackSpiderMiddlewareRightAfterSpider(NotGeneratorCallbackSpider):
+    name = 'NotGeneratorCallbackSpiderMiddlewareRightAfterSpider'
+    custom_settings = {
+        'SPIDER_MIDDLEWARES': {
+            __name__ + '.LogExceptionMiddleware': 100000,
+        },
+    }
+
+
+# ================================================================================
+# (4) exceptions from a middleware process_spider_output method (generator)
+class GeneratorOutputChainSpider(Spider):
+    name = 'GeneratorOutputChainSpider'
+    custom_settings = {
+        'SPIDER_MIDDLEWARES': {
+            __name__ + '.GeneratorFailMiddleware': 10,
+            __name__ + '.GeneratorDoNothingAfterFailureMiddleware': 8,
+            __name__ + '.GeneratorRecoverMiddleware': 5,
+            __name__ + '.GeneratorDoNothingAfterRecoveryMiddleware': 3,
+        },
+    }
+
+    def start_requests(self):
+        yield Request(self.mockserver.url('/status?n=200'))
+
+    def parse(self, response):
+        yield {'processed': ['parse-first-item']}
+        yield {'processed': ['parse-second-item']}
+
+
+class _GeneratorDoNothingMiddleware:
+    def process_spider_output(self, response, result, spider):
+        for r in result:
+            r['processed'].append('{}.process_spider_output'.format(self.__class__.__name__))
+            yield r
+
+    def process_spider_exception(self, response, exception, spider):
+        method = '{}.process_spider_exception'.format(self.__class__.__name__)
+        spider.logger.info('%s: %s caught', method, exception.__class__.__name__)
+        return None
+
+
+class GeneratorFailMiddleware:
+    def process_spider_output(self, response, result, spider):
+        for r in result:
+            r['processed'].append('{}.process_spider_output'.format(self.__class__.__name__))
+            yield r
+            raise LookupError()
+
+    def process_spider_exception(self, response, exception, spider):
+        method = '{}.process_spider_exception'.format(self.__class__.__name__)
+        spider.logger.info('%s: %s caught', method, exception.__class__.__name__)
+        yield {'processed': [method]}
+
+
+class GeneratorDoNothingAfterFailureMiddleware(_GeneratorDoNothingMiddleware):
+    pass
+
+
+class GeneratorRecoverMiddleware:
+    def process_spider_output(self, response, result, spider):
+        for r in result:
+            r['processed'].append('{}.process_spider_output'.format(self.__class__.__name__))
+            yield r
+
+    def process_spider_exception(self, response, exception, spider):
+        method = '{}.process_spider_exception'.format(self.__class__.__name__)
+        spider.logger.info('%s: %s caught', method, exception.__class__.__name__)
+        yield {'processed': [method]}
+
+
+class GeneratorDoNothingAfterRecoveryMiddleware(_GeneratorDoNothingMiddleware):
+    pass
+
+
+# ================================================================================
+# (5) exceptions from a middleware process_spider_output method (not generator)
+class NotGeneratorOutputChainSpider(Spider):
+    name = 'NotGeneratorOutputChainSpider'
+    custom_settings = {
+        'SPIDER_MIDDLEWARES': {
+            __name__ + '.NotGeneratorFailMiddleware': 10,
+            __name__ + '.NotGeneratorDoNothingAfterFailureMiddleware': 8,
+            __name__ + '.NotGeneratorRecoverMiddleware': 5,
+            __name__ + '.NotGeneratorDoNothingAfterRecoveryMiddleware': 3,
+        },
+    }
+
+    def start_requests(self):
+        return [Request(self.mockserver.url('/status?n=200'))]
+
+    def parse(self, response):
+        return [{'processed': ['parse-first-item']}, {'processed': ['parse-second-item']}]
+
+
+class _NotGeneratorDoNothingMiddleware:
+    def process_spider_output(self, response, result, spider):
+        out = []
+        for r in result:
+            r['processed'].append('{}.process_spider_output'.format(self.__class__.__name__))
+            out.append(r)
+        return out
+
+    def process_spider_exception(self, response, exception, spider):
+        method = '{}.process_spider_exception'.format(self.__class__.__name__)
+        spider.logger.info('%s: %s caught', method, exception.__class__.__name__)
+        return None
+
+
+class NotGeneratorFailMiddleware:
+    def process_spider_output(self, response, result, spider):
+        out = []
+        for r in result:
+            r['processed'].append('{}.process_spider_output'.format(self.__class__.__name__))
+            out.append(r)
+        raise ReferenceError()
+        return out
+
+    def process_spider_exception(self, response, exception, spider):
+        method = '{}.process_spider_exception'.format(self.__class__.__name__)
+        spider.logger.info('%s: %s caught', method, exception.__class__.__name__)
+        return [{'processed': [method]}]
+
+
+class NotGeneratorDoNothingAfterFailureMiddleware(_NotGeneratorDoNothingMiddleware):
+    pass
+
+
+class NotGeneratorRecoverMiddleware:
+    def process_spider_output(self, response, result, spider):
+        out = []
+        for r in result:
+            r['processed'].append('{}.process_spider_output'.format(self.__class__.__name__))
+            out.append(r)
+        return out
+
+    def process_spider_exception(self, response, exception, spider):
+        method = '{}.process_spider_exception'.format(self.__class__.__name__)
+        spider.logger.info('%s: %s caught', method, exception.__class__.__name__)
+        return [{'processed': [method]}]
+
+
+class NotGeneratorDoNothingAfterRecoveryMiddleware(_NotGeneratorDoNothingMiddleware):
+    pass
+
+
+# ================================================================================
+class TestSpiderMiddleware(TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.mockserver = MockServer()
+        cls.mockserver.__enter__()
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.mockserver.__exit__(None, None, None)
+
+    @defer.inlineCallbacks
+    def crawl_log(self, spider):
+        crawler = get_crawler(spider)
+        with LogCapture() as log:
+            yield crawler.crawl(mockserver=self.mockserver)
+        return log
+
+    @defer.inlineCallbacks
+    def test_recovery(self):
+        """
+        (0) Recover from an exception in a spider callback. The final item count should be 3
+        (one yielded from the callback method before the exception is raised, one directly
+        from the recovery middleware and one from the spider when processing the request that
+        was enqueued from the recovery middleware)
+        """
+        log = yield self.crawl_log(RecoverySpider)
+        self.assertIn("Middleware: TabError exception caught", str(log))
+        self.assertEqual(str(log).count("Middleware: TabError exception caught"), 1)
+        self.assertIn("'item_scraped_count': 3", str(log))
+
+    @defer.inlineCallbacks
+    def test_process_spider_input_without_errback(self):
+        """
+        (1.1) An exception from the process_spider_input chain should be caught by the
+        process_spider_exception chain from the start if the Request has no errback
+        """
+        log1 = yield self.crawl_log(ProcessSpiderInputSpiderWithoutErrback)
+        self.assertIn("Middleware: will raise IndexError", str(log1))
+        self.assertIn("Middleware: IndexError exception caught", str(log1))
+
+    @defer.inlineCallbacks
+    def test_process_spider_input_with_errback(self):
+        """
+        (1.2) An exception from the process_spider_input chain should not be caught by the
+        process_spider_exception chain if the Request has an errback
+        """
+        log1 = yield self.crawl_log(ProcessSpiderInputSpiderWithErrback)
+        self.assertNotIn("Middleware: IndexError exception caught", str(log1))
+        self.assertIn("Middleware: will raise IndexError", str(log1))
+        self.assertIn("Got a Failure on the Request errback", str(log1))
+        self.assertIn("{'from': 'errback'}", str(log1))
+        self.assertNotIn("{'from': 'callback'}", str(log1))
+        self.assertIn("'item_scraped_count': 1", str(log1))
+
+    @defer.inlineCallbacks
+    def test_generator_callback(self):
+        """
+        (2) An exception from a spider callback (returning a generator) should
+        be caught by the process_spider_exception chain. Items yielded before the
+        exception is raised should be processed normally.
+        """
+        log2 = yield self.crawl_log(GeneratorCallbackSpider)
+        self.assertIn("Middleware: ImportError exception caught", str(log2))
+        self.assertIn("'item_scraped_count': 2", str(log2))
+
+    @defer.inlineCallbacks
+    def test_generator_callback_right_after_callback(self):
+        """
+        (2.1) Special case of (2): Exceptions should be caught
+        even if the middleware is placed right after the spider
+        """
+        log21 = yield self.crawl_log(GeneratorCallbackSpiderMiddlewareRightAfterSpider)
+        self.assertIn("Middleware: ImportError exception caught", str(log21))
+        self.assertIn("'item_scraped_count': 2", str(log21))
+
+    @defer.inlineCallbacks
+    def test_not_a_generator_callback(self):
+        """
+        (3) An exception from a spider callback (returning a list) should
+        be caught by the process_spider_exception chain. No items should be processed.
+        """
+        log3 = yield self.crawl_log(NotGeneratorCallbackSpider)
+        self.assertIn("Middleware: ZeroDivisionError exception caught", str(log3))
+        self.assertNotIn("item_scraped_count", str(log3))
+
+    @defer.inlineCallbacks
+    def test_not_a_generator_callback_right_after_callback(self):
+        """
+        (3.1) Special case of (3): Exceptions should be caught
+        even if the middleware is placed right after the spider
+        """
+        log31 = yield self.crawl_log(NotGeneratorCallbackSpiderMiddlewareRightAfterSpider)
+        self.assertIn("Middleware: ZeroDivisionError exception caught", str(log31))
+        self.assertNotIn("item_scraped_count", str(log31))
+
+    @defer.inlineCallbacks
+    def test_generator_output_chain(self):
+        """
+        (4) An exception from a middleware's process_spider_output method should be sent
+        to the process_spider_exception method from the next middleware in the chain.
+        The result of the recovery by the process_spider_exception method should be handled
+        by the process_spider_output method from the next middleware.
+        The final item count should be 2 (one from the spider callback and one from the
+        process_spider_exception chain)
+        """
+        log4 = yield self.crawl_log(GeneratorOutputChainSpider)
+        self.assertIn("'item_scraped_count': 2", str(log4))
+        self.assertIn("GeneratorRecoverMiddleware.process_spider_exception: LookupError caught", str(log4))
+        self.assertIn(
+            "GeneratorDoNothingAfterFailureMiddleware.process_spider_exception: LookupError caught",
+            str(log4))
+        self.assertNotIn(
+            "GeneratorFailMiddleware.process_spider_exception: LookupError caught",
+            str(log4))
+        self.assertNotIn(
+            "GeneratorDoNothingAfterRecoveryMiddleware.process_spider_exception: LookupError caught",
+            str(log4))
+        item_from_callback = {'processed': [
+            'parse-first-item',
+            'GeneratorFailMiddleware.process_spider_output',
+            'GeneratorDoNothingAfterFailureMiddleware.process_spider_output',
+            'GeneratorRecoverMiddleware.process_spider_output',
+            'GeneratorDoNothingAfterRecoveryMiddleware.process_spider_output']}
+        item_recovered = {'processed': [
+            'GeneratorRecoverMiddleware.process_spider_exception',
+            'GeneratorDoNothingAfterRecoveryMiddleware.process_spider_output']}
+        self.assertIn(str(item_from_callback), str(log4))
+        self.assertIn(str(item_recovered), str(log4))
+        self.assertNotIn('parse-second-item', str(log4))
+
+    @defer.inlineCallbacks
+    def test_not_a_generator_output_chain(self):
+        """
+        (5) An exception from a middleware's process_spider_output method should be sent
+        to the process_spider_exception method from the next middleware in the chain.
+        The result of the recovery by the process_spider_exception method should be handled
+        by the process_spider_output method from the next middleware.
+        The final item count should be 1 (from the process_spider_exception chain, the items
+        from the spider callback are lost)
+        """
+        log5 = yield self.crawl_log(NotGeneratorOutputChainSpider)
+        self.assertIn("'item_scraped_count': 1", str(log5))
+        self.assertIn("GeneratorRecoverMiddleware.process_spider_exception: ReferenceError caught", str(log5))
+        self.assertIn(
+            "GeneratorDoNothingAfterFailureMiddleware.process_spider_exception: ReferenceError caught",
+            str(log5))
+        self.assertNotIn("GeneratorFailMiddleware.process_spider_exception: ReferenceError caught", str(log5))
+        self.assertNotIn(
+            "GeneratorDoNothingAfterRecoveryMiddleware.process_spider_exception: ReferenceError caught",
+            str(log5))
+        item_recovered = {'processed': [
+            'NotGeneratorRecoverMiddleware.process_spider_exception',
+            'NotGeneratorDoNothingAfterRecoveryMiddleware.process_spider_output']}
+        self.assertIn(str(item_recovered), str(log5))
+        self.assertNotIn('parse-first-item', str(log5))
+        self.assertNotIn('parse-second-item', str(log5))
diff --git a/tests/test_spidermiddleware_referer.py b/tests/test_spidermiddleware_referer.py
index f408719d2..5141f47af 100644
--- a/tests/test_spidermiddleware_referer.py
+++ b/tests/test_spidermiddleware_referer.py
@@ -1,21 +1,951 @@
+from urllib.parse import urlparse
 from unittest import TestCase
+import warnings
 
 from scrapy.http import Response, Request
-from scrapy.spider import Spider
-from scrapy.contrib.spidermiddleware.referer import RefererMiddleware
+from scrapy.settings import Settings
+from scrapy.spiders import Spider
+from scrapy.downloadermiddlewares.redirect import RedirectMiddleware
+from scrapy.spidermiddlewares.referer import (
+    DefaultReferrerPolicy,
+    NoReferrerPolicy,
+    NoReferrerWhenDowngradePolicy,
+    OriginPolicy,
+    OriginWhenCrossOriginPolicy,
+    POLICY_NO_REFERRER,
+    POLICY_NO_REFERRER_WHEN_DOWNGRADE,
+    POLICY_ORIGIN,
+    POLICY_ORIGIN_WHEN_CROSS_ORIGIN,
+    POLICY_SAME_ORIGIN,
+    POLICY_SCRAPY_DEFAULT,
+    POLICY_STRICT_ORIGIN,
+    POLICY_STRICT_ORIGIN_WHEN_CROSS_ORIGIN,
+    POLICY_UNSAFE_URL,
+    RefererMiddleware,
+    ReferrerPolicy,
+    SameOriginPolicy,
+    StrictOriginPolicy,
+    StrictOriginWhenCrossOriginPolicy,
+    UnsafeUrlPolicy,
+)
 
 
 class TestRefererMiddleware(TestCase):
 
+    req_meta = {}
+    resp_headers = {}
+    settings = {}
+    scenarii = [
+        ('http://scrapytest.org', 'http://scrapytest.org/', b'http://scrapytest.org'),
+    ]
+
     def setUp(self):
         self.spider = Spider('foo')
-        self.mw = RefererMiddleware()
+        settings = Settings(self.settings)
+        self.mw = RefererMiddleware(settings)
 
-    def test_process_spider_output(self):
-        res = Response('http://scrapytest.org')
-        reqs = [Request('http://scrapytest.org/')]
+    def get_request(self, target):
+        return Request(target, meta=self.req_meta)
 
-        out = list(self.mw.process_spider_output(res, reqs, self.spider))
-        self.assertEquals(out[0].headers.get('Referer'),
-                          'http://scrapytest.org')
+    def get_response(self, origin):
+        return Response(origin, headers=self.resp_headers)
 
+    def test(self):
+
+        for origin, target, referrer in self.scenarii:
+            response = self.get_response(origin)
+            request = self.get_request(target)
+            out = list(self.mw.process_spider_output(response, [request], self.spider))
+            self.assertEqual(out[0].headers.get('Referer'), referrer)
+
+
+class MixinDefault:
+    """
+    Based on https://www.w3.org/TR/referrer-policy/#referrer-policy-no-referrer-when-downgrade
+
+    with some additional filtering of s3://
+    """
+    scenarii = [
+        ('https://example.com/', 'https://scrapy.org/', b'https://example.com/'),
+        ('http://example.com/', 'http://scrapy.org/', b'http://example.com/'),
+        ('http://example.com/', 'https://scrapy.org/', b'http://example.com/'),
+        ('https://example.com/', 'http://scrapy.org/', None),
+
+        # no credentials leak
+        ('http://user:password@example.com/', 'https://scrapy.org/', b'http://example.com/'),
+
+        # no referrer leak for local schemes
+        ('file:///home/path/to/somefile.html', 'https://scrapy.org/', None),
+        ('file:///home/path/to/somefile.html', 'http://scrapy.org/', None),
+
+        # no referrer leak for s3 origins
+        ('s3://mybucket/path/to/data.csv', 'https://scrapy.org/', None),
+        ('s3://mybucket/path/to/data.csv', 'http://scrapy.org/', None),
+    ]
+
+
+class MixinNoReferrer:
+    scenarii = [
+        ('https://example.com/page.html', 'https://example.com/', None),
+        ('http://www.example.com/', 'https://scrapy.org/', None),
+        ('http://www.example.com/', 'http://scrapy.org/', None),
+        ('https://www.example.com/', 'http://scrapy.org/', None),
+        ('file:///home/path/to/somefile.html', 'http://scrapy.org/', None),
+    ]
+
+
+class MixinNoReferrerWhenDowngrade:
+    scenarii = [
+        # TLS to TLS: send non-empty referrer
+        ('https://example.com/page.html', 'https://not.example.com/', b'https://example.com/page.html'),
+        ('https://example.com/page.html', 'https://scrapy.org/', b'https://example.com/page.html'),
+        ('https://example.com:443/page.html', 'https://scrapy.org/', b'https://example.com/page.html'),
+        ('https://example.com:444/page.html', 'https://scrapy.org/', b'https://example.com:444/page.html'),
+        ('ftps://example.com/urls.zip', 'https://scrapy.org/', b'ftps://example.com/urls.zip'),
+
+        # TLS to non-TLS: do not send referrer
+        ('https://example.com/page.html', 'http://not.example.com/', None),
+        ('https://example.com/page.html', 'http://scrapy.org/', None),
+        ('ftps://example.com/urls.zip', 'http://scrapy.org/', None),
+
+        # non-TLS to TLS or non-TLS: send referrer
+        ('http://example.com/page.html', 'https://not.example.com/', b'http://example.com/page.html'),
+        ('http://example.com/page.html', 'https://scrapy.org/', b'http://example.com/page.html'),
+        ('http://example.com:8080/page.html', 'https://scrapy.org/', b'http://example.com:8080/page.html'),
+        ('http://example.com:80/page.html', 'http://not.example.com/', b'http://example.com/page.html'),
+        ('http://example.com/page.html', 'http://scrapy.org/', b'http://example.com/page.html'),
+        ('http://example.com:443/page.html', 'http://scrapy.org/', b'http://example.com:443/page.html'),
+        ('ftp://example.com/urls.zip', 'http://scrapy.org/', b'ftp://example.com/urls.zip'),
+        ('ftp://example.com/urls.zip', 'https://scrapy.org/', b'ftp://example.com/urls.zip'),
+
+        # test for user/password stripping
+        ('http://user:password@example.com/page.html', 'https://not.example.com/', b'http://example.com/page.html'),
+    ]
+
+
+class MixinSameOrigin:
+    scenarii = [
+        # Same origin (protocol, host, port): send referrer
+        ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+        ('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
+        ('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+        ('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
+        ('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
+        (
+            'http://example.com:8888/page.html',
+            'http://example.com:8888/not-page.html',
+            b'http://example.com:8888/page.html',
+        ),
+
+        # Different host: do NOT send referrer
+        ('https://example.com/page.html', 'https://not.example.com/otherpage.html', None),
+        ('http://example.com/page.html', 'http://not.example.com/otherpage.html', None),
+        ('http://example.com/page.html', 'http://www.example.com/otherpage.html', None),
+
+        # Different port: do NOT send referrer
+        ('https://example.com:444/page.html', 'https://example.com/not-page.html', None),
+        ('http://example.com:81/page.html', 'http://example.com/not-page.html', None),
+        ('http://example.com/page.html', 'http://example.com:81/not-page.html', None),
+
+        # Different protocols: do NOT send refferer
+        ('https://example.com/page.html', 'http://example.com/not-page.html', None),
+        ('https://example.com/page.html', 'http://not.example.com/', None),
+        ('ftps://example.com/urls.zip', 'https://example.com/not-page.html', None),
+        ('ftp://example.com/urls.zip', 'http://example.com/not-page.html', None),
+        ('ftps://example.com/urls.zip', 'https://example.com/not-page.html', None),
+
+        # test for user/password stripping
+        ('https://user:password@example.com/page.html', 'http://example.com/not-page.html', None),
+        (
+            'https://user:password@example.com/page.html',
+            'https://example.com/not-page.html',
+            b'https://example.com/page.html',
+        ),
+    ]
+
+
+class MixinOrigin:
+    scenarii = [
+        # TLS or non-TLS to TLS or non-TLS: referrer origin is sent (yes, even for downgrades)
+        ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/'),
+        ('https://example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
+        ('https://example.com/page.html', 'http://scrapy.org', b'https://example.com/'),
+        ('http://example.com/page.html', 'http://scrapy.org', b'http://example.com/'),
+
+        # test for user/password stripping
+        ('https://user:password@example.com/page.html', 'http://scrapy.org', b'https://example.com/'),
+    ]
+
+
+class MixinStrictOrigin:
+    scenarii = [
+        # TLS or non-TLS to TLS or non-TLS: referrer origin is sent but not for downgrades
+        ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/'),
+        ('https://example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
+        ('http://example.com/page.html', 'http://scrapy.org', b'http://example.com/'),
+
+        # downgrade: send nothing
+        ('https://example.com/page.html', 'http://scrapy.org', None),
+
+        # upgrade: send origin
+        ('http://example.com/page.html', 'https://scrapy.org', b'http://example.com/'),
+
+        # test for user/password stripping
+        ('https://user:password@example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
+        ('https://user:password@example.com/page.html', 'http://scrapy.org', None),
+    ]
+
+
+class MixinOriginWhenCrossOrigin:
+    scenarii = [
+        # Same origin (protocol, host, port): send referrer
+        ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+        ('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
+        ('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+        ('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
+        ('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
+        (
+            'http://example.com:8888/page.html',
+            'http://example.com:8888/not-page.html',
+            b'http://example.com:8888/page.html',
+        ),
+
+        # Different host: send origin as referrer
+        ('https://example2.com/page.html', 'https://scrapy.org/otherpage.html', b'https://example2.com/'),
+        ('https://example2.com/page.html', 'https://not.example2.com/otherpage.html', b'https://example2.com/'),
+        ('http://example2.com/page.html', 'http://not.example2.com/otherpage.html', b'http://example2.com/'),
+        # exact match required
+        ('http://example2.com/page.html', 'http://www.example2.com/otherpage.html', b'http://example2.com/'),
+
+        # Different port: send origin as referrer
+        ('https://example3.com:444/page.html', 'https://example3.com/not-page.html', b'https://example3.com:444/'),
+        ('http://example3.com:81/page.html', 'http://example3.com/not-page.html', b'http://example3.com:81/'),
+
+        # Different protocols: send origin as referrer
+        ('https://example4.com/page.html', 'http://example4.com/not-page.html', b'https://example4.com/'),
+        ('https://example4.com/page.html', 'http://not.example4.com/', b'https://example4.com/'),
+        ('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
+        ('ftp://example4.com/urls.zip', 'http://example4.com/not-page.html', b'ftp://example4.com/'),
+        ('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
+
+        # test for user/password stripping
+        (
+            'https://user:password@example5.com/page.html',
+            'https://example5.com/not-page.html',
+            b'https://example5.com/page.html',
+        ),
+        # TLS to non-TLS downgrade: send origin
+        (
+            'https://user:password@example5.com/page.html',
+            'http://example5.com/not-page.html',
+            b'https://example5.com/',
+        ),
+    ]
+
+
+class MixinStrictOriginWhenCrossOrigin:
+    scenarii = [
+        # Same origin (protocol, host, port): send referrer
+        ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+        ('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
+        ('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+        ('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
+        ('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
+        (
+            'http://example.com:8888/page.html',
+            'http://example.com:8888/not-page.html',
+            b'http://example.com:8888/page.html',
+        ),
+
+        # Different host: send origin as referrer
+        ('https://example2.com/page.html', 'https://scrapy.org/otherpage.html', b'https://example2.com/'),
+        ('https://example2.com/page.html', 'https://not.example2.com/otherpage.html', b'https://example2.com/'),
+        ('http://example2.com/page.html', 'http://not.example2.com/otherpage.html', b'http://example2.com/'),
+        # exact match required
+        ('http://example2.com/page.html', 'http://www.example2.com/otherpage.html', b'http://example2.com/'),
+
+        # Different port: send origin as referrer
+        ('https://example3.com:444/page.html', 'https://example3.com/not-page.html', b'https://example3.com:444/'),
+        ('http://example3.com:81/page.html', 'http://example3.com/not-page.html', b'http://example3.com:81/'),
+
+        # downgrade
+        ('https://example4.com/page.html', 'http://example4.com/not-page.html', None),
+        ('https://example4.com/page.html', 'http://not.example4.com/', None),
+
+        # non-TLS to non-TLS
+        ('ftp://example4.com/urls.zip', 'http://example4.com/not-page.html', b'ftp://example4.com/'),
+
+        # upgrade
+        ('http://example4.com/page.html', 'https://example4.com/not-page.html', b'http://example4.com/'),
+        ('http://example4.com/page.html', 'https://not.example4.com/', b'http://example4.com/'),
+
+        # Different protocols: send origin as referrer
+        ('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
+        ('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
+
+        # test for user/password stripping
+        (
+            'https://user:password@example5.com/page.html',
+            'https://example5.com/not-page.html',
+            b'https://example5.com/page.html',
+        ),
+
+        # TLS to non-TLS downgrade: send nothing
+        ('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html', None),
+    ]
+
+
+class MixinUnsafeUrl:
+    scenarii = [
+        # TLS to TLS: send referrer
+        ('https://example.com/sekrit.html', 'http://not.example.com/', b'https://example.com/sekrit.html'),
+        ('https://example1.com/page.html', 'https://not.example1.com/', b'https://example1.com/page.html'),
+        ('https://example1.com/page.html', 'https://scrapy.org/', b'https://example1.com/page.html'),
+        ('https://example1.com:443/page.html', 'https://scrapy.org/', b'https://example1.com/page.html'),
+        ('https://example1.com:444/page.html', 'https://scrapy.org/', b'https://example1.com:444/page.html'),
+        ('ftps://example1.com/urls.zip', 'https://scrapy.org/', b'ftps://example1.com/urls.zip'),
+
+        # TLS to non-TLS: send referrer (yes, it's unsafe)
+        ('https://example2.com/page.html', 'http://not.example2.com/', b'https://example2.com/page.html'),
+        ('https://example2.com/page.html', 'http://scrapy.org/', b'https://example2.com/page.html'),
+        ('ftps://example2.com/urls.zip', 'http://scrapy.org/', b'ftps://example2.com/urls.zip'),
+
+        # non-TLS to TLS or non-TLS: send referrer (yes, it's unsafe)
+        ('http://example3.com/page.html', 'https://not.example3.com/', b'http://example3.com/page.html'),
+        ('http://example3.com/page.html', 'https://scrapy.org/', b'http://example3.com/page.html'),
+        ('http://example3.com:8080/page.html', 'https://scrapy.org/', b'http://example3.com:8080/page.html'),
+        ('http://example3.com:80/page.html', 'http://not.example3.com/', b'http://example3.com/page.html'),
+        ('http://example3.com/page.html', 'http://scrapy.org/', b'http://example3.com/page.html'),
+        ('http://example3.com:443/page.html', 'http://scrapy.org/', b'http://example3.com:443/page.html'),
+        ('ftp://example3.com/urls.zip', 'http://scrapy.org/', b'ftp://example3.com/urls.zip'),
+        ('ftp://example3.com/urls.zip', 'https://scrapy.org/', b'ftp://example3.com/urls.zip'),
+
+        # test for user/password stripping
+        (
+            'http://user:password@example4.com/page.html',
+            'https://not.example4.com/',
+            b'http://example4.com/page.html',
+        ),
+        (
+            'https://user:password@example4.com/page.html',
+            'http://scrapy.org/',
+            b'https://example4.com/page.html',
+        ),
+    ]
+
+
+class TestRefererMiddlewareDefault(MixinDefault, TestRefererMiddleware):
+    pass
+
+
+# --- Tests using settings to set policy using class path
+class TestSettingsNoReferrer(MixinNoReferrer, TestRefererMiddleware):
+    settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.NoReferrerPolicy'}
+
+
+class TestSettingsNoReferrerWhenDowngrade(MixinNoReferrerWhenDowngrade, TestRefererMiddleware):
+    settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.NoReferrerWhenDowngradePolicy'}
+
+
+class TestSettingsSameOrigin(MixinSameOrigin, TestRefererMiddleware):
+    settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.SameOriginPolicy'}
+
+
+class TestSettingsOrigin(MixinOrigin, TestRefererMiddleware):
+    settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.OriginPolicy'}
+
+
+class TestSettingsStrictOrigin(MixinStrictOrigin, TestRefererMiddleware):
+    settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.StrictOriginPolicy'}
+
+
+class TestSettingsOriginWhenCrossOrigin(MixinOriginWhenCrossOrigin, TestRefererMiddleware):
+    settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.OriginWhenCrossOriginPolicy'}
+
+
+class TestSettingsStrictOriginWhenCrossOrigin(MixinStrictOriginWhenCrossOrigin, TestRefererMiddleware):
+    settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.StrictOriginWhenCrossOriginPolicy'}
+
+
+class TestSettingsUnsafeUrl(MixinUnsafeUrl, TestRefererMiddleware):
+    settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.UnsafeUrlPolicy'}
+
+
+class CustomPythonOrgPolicy(ReferrerPolicy):
+    """
+    A dummy policy that returns referrer as http(s)://python.org
+    depending on the scheme of the target URL.
+    """
+    def referrer(self, response, request):
+        scheme = urlparse(request).scheme
+        if scheme == 'https':
+            return b'https://python.org/'
+        elif scheme == 'http':
+            return b'http://python.org/'
+
+
+class TestSettingsCustomPolicy(TestRefererMiddleware):
+    settings = {'REFERRER_POLICY': 'tests.test_spidermiddleware_referer.CustomPythonOrgPolicy'}
+    scenarii = [
+        ('https://example.com/', 'https://scrapy.org/', b'https://python.org/'),
+        ('http://example.com/', 'http://scrapy.org/', b'http://python.org/'),
+        ('http://example.com/', 'https://scrapy.org/', b'https://python.org/'),
+        ('https://example.com/', 'http://scrapy.org/', b'http://python.org/'),
+        ('file:///home/path/to/somefile.html', 'https://scrapy.org/', b'https://python.org/'),
+        ('file:///home/path/to/somefile.html', 'http://scrapy.org/', b'http://python.org/'),
+
+    ]
+
+
+# --- Tests using Request meta dict to set policy
+class TestRequestMetaDefault(MixinDefault, TestRefererMiddleware):
+    req_meta = {'referrer_policy': POLICY_SCRAPY_DEFAULT}
+
+
+class TestRequestMetaNoReferrer(MixinNoReferrer, TestRefererMiddleware):
+    req_meta = {'referrer_policy': POLICY_NO_REFERRER}
+
+
+class TestRequestMetaNoReferrerWhenDowngrade(MixinNoReferrerWhenDowngrade, TestRefererMiddleware):
+    req_meta = {'referrer_policy': POLICY_NO_REFERRER_WHEN_DOWNGRADE}
+
+
+class TestRequestMetaSameOrigin(MixinSameOrigin, TestRefererMiddleware):
+    req_meta = {'referrer_policy': POLICY_SAME_ORIGIN}
+
+
+class TestRequestMetaOrigin(MixinOrigin, TestRefererMiddleware):
+    req_meta = {'referrer_policy': POLICY_ORIGIN}
+
+
+class TestRequestMetaSrictOrigin(MixinStrictOrigin, TestRefererMiddleware):
+    req_meta = {'referrer_policy': POLICY_STRICT_ORIGIN}
+
+
+class TestRequestMetaOriginWhenCrossOrigin(MixinOriginWhenCrossOrigin, TestRefererMiddleware):
+    req_meta = {'referrer_policy': POLICY_ORIGIN_WHEN_CROSS_ORIGIN}
+
+
+class TestRequestMetaStrictOriginWhenCrossOrigin(MixinStrictOriginWhenCrossOrigin, TestRefererMiddleware):
+    req_meta = {'referrer_policy': POLICY_STRICT_ORIGIN_WHEN_CROSS_ORIGIN}
+
+
+class TestRequestMetaUnsafeUrl(MixinUnsafeUrl, TestRefererMiddleware):
+    req_meta = {'referrer_policy': POLICY_UNSAFE_URL}
+
+
+class TestRequestMetaPredecence001(MixinUnsafeUrl, TestRefererMiddleware):
+    settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.SameOriginPolicy'}
+    req_meta = {'referrer_policy': POLICY_UNSAFE_URL}
+
+
+class TestRequestMetaPredecence002(MixinNoReferrer, TestRefererMiddleware):
+    settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.NoReferrerWhenDowngradePolicy'}
+    req_meta = {'referrer_policy': POLICY_NO_REFERRER}
+
+
+class TestRequestMetaPredecence003(MixinUnsafeUrl, TestRefererMiddleware):
+    settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.OriginWhenCrossOriginPolicy'}
+    req_meta = {'referrer_policy': POLICY_UNSAFE_URL}
+
+
+class TestRequestMetaSettingFallback(TestCase):
+
+    params = [
+        (
+            # When an unknown policy is referenced in Request.meta
+            # (here, a typo error),
+            # the policy defined in settings takes precedence
+            {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.OriginWhenCrossOriginPolicy'},
+            {},
+            {'referrer_policy': 'ssscrapy-default'},
+            OriginWhenCrossOriginPolicy,
+            True
+        ),
+        (
+            # same as above but with string value for settings policy
+            {'REFERRER_POLICY': 'origin-when-cross-origin'},
+            {},
+            {'referrer_policy': 'ssscrapy-default'},
+            OriginWhenCrossOriginPolicy,
+            True
+        ),
+        (
+            # request meta references a wrong policy but it is set,
+            # so the Referrer-Policy header in response is not used,
+            # and the settings' policy is applied
+            {'REFERRER_POLICY': 'origin-when-cross-origin'},
+            {'Referrer-Policy': 'unsafe-url'},
+            {'referrer_policy': 'ssscrapy-default'},
+            OriginWhenCrossOriginPolicy,
+            True
+        ),
+        (
+            # here, request meta does not set the policy
+            # so response headers take precedence
+            {'REFERRER_POLICY': 'origin-when-cross-origin'},
+            {'Referrer-Policy': 'unsafe-url'},
+            {},
+            UnsafeUrlPolicy,
+            False
+        ),
+        (
+            # here, request meta does not set the policy,
+            # but response headers also use an unknown policy,
+            # so the settings' policy is used
+            {'REFERRER_POLICY': 'origin-when-cross-origin'},
+            {'Referrer-Policy': 'unknown'},
+            {},
+            OriginWhenCrossOriginPolicy,
+            True
+        )
+    ]
+
+    def test(self):
+
+        origin = 'http://www.scrapy.org'
+        target = 'http://www.example.com'
+
+        for settings, response_headers, request_meta, policy_class, check_warning in self.params[3:]:
+            mw = RefererMiddleware(Settings(settings))
+
+            response = Response(origin, headers=response_headers)
+            request = Request(target, meta=request_meta)
+
+            with warnings.catch_warnings(record=True) as w:
+                policy = mw.policy(response, request)
+                self.assertIsInstance(policy, policy_class)
+
+                if check_warning:
+                    self.assertEqual(len(w), 1)
+                    self.assertEqual(w[0].category, RuntimeWarning, w[0].message)
+
+
+class TestSettingsPolicyByName(TestCase):
+
+    def test_valid_name(self):
+        for s, p in [
+            (POLICY_SCRAPY_DEFAULT, DefaultReferrerPolicy),
+            (POLICY_NO_REFERRER, NoReferrerPolicy),
+            (POLICY_NO_REFERRER_WHEN_DOWNGRADE, NoReferrerWhenDowngradePolicy),
+            (POLICY_SAME_ORIGIN, SameOriginPolicy),
+            (POLICY_ORIGIN, OriginPolicy),
+            (POLICY_STRICT_ORIGIN, StrictOriginPolicy),
+            (POLICY_ORIGIN_WHEN_CROSS_ORIGIN, OriginWhenCrossOriginPolicy),
+            (POLICY_STRICT_ORIGIN_WHEN_CROSS_ORIGIN, StrictOriginWhenCrossOriginPolicy),
+            (POLICY_UNSAFE_URL, UnsafeUrlPolicy),
+        ]:
+            settings = Settings({'REFERRER_POLICY': s})
+            mw = RefererMiddleware(settings)
+            self.assertEqual(mw.default_policy, p)
+
+    def test_valid_name_casevariants(self):
+        for s, p in [
+            (POLICY_SCRAPY_DEFAULT, DefaultReferrerPolicy),
+            (POLICY_NO_REFERRER, NoReferrerPolicy),
+            (POLICY_NO_REFERRER_WHEN_DOWNGRADE, NoReferrerWhenDowngradePolicy),
+            (POLICY_SAME_ORIGIN, SameOriginPolicy),
+            (POLICY_ORIGIN, OriginPolicy),
+            (POLICY_STRICT_ORIGIN, StrictOriginPolicy),
+            (POLICY_ORIGIN_WHEN_CROSS_ORIGIN, OriginWhenCrossOriginPolicy),
+            (POLICY_STRICT_ORIGIN_WHEN_CROSS_ORIGIN, StrictOriginWhenCrossOriginPolicy),
+            (POLICY_UNSAFE_URL, UnsafeUrlPolicy),
+        ]:
+            settings = Settings({'REFERRER_POLICY': s.upper()})
+            mw = RefererMiddleware(settings)
+            self.assertEqual(mw.default_policy, p)
+
+    def test_invalid_name(self):
+        settings = Settings({'REFERRER_POLICY': 'some-custom-unknown-policy'})
+        with self.assertRaises(RuntimeError):
+            RefererMiddleware(settings)
+
+
+class TestPolicyHeaderPredecence001(MixinUnsafeUrl, TestRefererMiddleware):
+    settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.SameOriginPolicy'}
+    resp_headers = {'Referrer-Policy': POLICY_UNSAFE_URL.upper()}
+
+
+class TestPolicyHeaderPredecence002(MixinNoReferrer, TestRefererMiddleware):
+    settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.NoReferrerWhenDowngradePolicy'}
+    resp_headers = {'Referrer-Policy': POLICY_NO_REFERRER.swapcase()}
+
+
+class TestPolicyHeaderPredecence003(MixinNoReferrerWhenDowngrade, TestRefererMiddleware):
+    settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.OriginWhenCrossOriginPolicy'}
+    resp_headers = {'Referrer-Policy': POLICY_NO_REFERRER_WHEN_DOWNGRADE.title()}
+
+
+class TestPolicyHeaderPredecence004(MixinNoReferrerWhenDowngrade, TestRefererMiddleware):
+    """
+    The empty string means "no-referrer-when-downgrade"
+    """
+    settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.OriginWhenCrossOriginPolicy'}
+    resp_headers = {'Referrer-Policy': ''}
+
+
+class TestReferrerOnRedirect(TestRefererMiddleware):
+
+    settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.UnsafeUrlPolicy'}
+    scenarii = [
+        (
+            'http://scrapytest.org/1',      # parent
+            'http://scrapytest.org/2',      # target
+            (
+                # redirections: code, URL
+                (301, 'http://scrapytest.org/3'),
+                (301, 'http://scrapytest.org/4'),
+            ),
+            b'http://scrapytest.org/1',  # expected initial referer
+            b'http://scrapytest.org/1',  # expected referer for the redirection request
+        ),
+        (
+            'https://scrapytest.org/1',
+            'https://scrapytest.org/2',
+            (
+                # redirecting to non-secure URL
+                (301, 'http://scrapytest.org/3'),
+            ),
+            b'https://scrapytest.org/1',
+            b'https://scrapytest.org/1',
+        ),
+        (
+            'https://scrapytest.org/1',
+            'https://scrapytest.com/2',
+            (
+                # redirecting to non-secure URL: different origin
+                (301, 'http://scrapytest.com/3'),
+            ),
+            b'https://scrapytest.org/1',
+            b'https://scrapytest.org/1',
+        ),
+    ]
+
+    def setUp(self):
+        self.spider = Spider('foo')
+        settings = Settings(self.settings)
+        self.referrermw = RefererMiddleware(settings)
+        self.redirectmw = RedirectMiddleware(settings)
+
+    def test(self):
+
+        for parent, target, redirections, init_referrer, final_referrer in self.scenarii:
+            response = self.get_response(parent)
+            request = self.get_request(target)
+
+            out = list(self.referrermw.process_spider_output(response, [request], self.spider))
+            self.assertEqual(out[0].headers.get('Referer'), init_referrer)
+
+            for status, url in redirections:
+                response = Response(request.url, headers={'Location': url}, status=status)
+                request = self.redirectmw.process_response(request, response, self.spider)
+                self.referrermw.request_scheduled(request, self.spider)
+
+            assert isinstance(request, Request)
+            self.assertEqual(request.headers.get('Referer'), final_referrer)
+
+
+class TestReferrerOnRedirectNoReferrer(TestReferrerOnRedirect):
+    """
+    No Referrer policy never sets the "Referer" header.
+    HTTP redirections should not change that.
+    """
+    settings = {'REFERRER_POLICY': 'no-referrer'}
+    scenarii = [
+        (
+            'http://scrapytest.org/1',      # parent
+            'http://scrapytest.org/2',      # target
+            (
+                # redirections: code, URL
+                (301, 'http://scrapytest.org/3'),
+                (301, 'http://scrapytest.org/4'),
+            ),
+            None,  # expected initial "Referer"
+            None,  # expected "Referer" for the redirection request
+        ),
+        (
+            'https://scrapytest.org/1',
+            'https://scrapytest.org/2',
+            (
+                (301, 'http://scrapytest.org/3'),
+            ),
+            None,
+            None,
+        ),
+        (
+            'https://scrapytest.org/1',
+            'https://example.com/2',    # different origin
+            (
+                (301, 'http://scrapytest.com/3'),
+            ),
+            None,
+            None,
+        ),
+    ]
+
+
+class TestReferrerOnRedirectSameOrigin(TestReferrerOnRedirect):
+    """
+    Same Origin policy sends the full URL as "Referer" if the target origin
+    is the same as the parent response (same protocol, same domain, same port).
+
+    HTTP redirections to a different domain or a lower secure level
+    should have the "Referer" removed.
+    """
+    settings = {'REFERRER_POLICY': 'same-origin'}
+    scenarii = [
+        (
+            'http://scrapytest.org/101',      # origin
+            'http://scrapytest.org/102',      # target
+            (
+                # redirections: code, URL
+                (301, 'http://scrapytest.org/103'),
+                (301, 'http://scrapytest.org/104'),
+            ),
+            b'http://scrapytest.org/101',  # expected initial "Referer"
+            b'http://scrapytest.org/101',  # expected referer for the redirection request
+        ),
+        (
+            'https://scrapytest.org/201',
+            'https://scrapytest.org/202',
+            (
+                # redirecting from secure to non-secure URL == different origin
+                (301, 'http://scrapytest.org/203'),
+            ),
+            b'https://scrapytest.org/201',
+            None,
+        ),
+        (
+            'https://scrapytest.org/301',
+            'https://scrapytest.org/302',
+            (
+                # different domain == different origin
+                (301, 'http://example.com/303'),
+            ),
+            b'https://scrapytest.org/301',
+            None,
+        ),
+    ]
+
+
+class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
+    """
+    Strict Origin policy will always send the "origin" as referrer
+    (think of it as the parent URL without the path part),
+    unless the security level is lower and no "Referer" is sent.
+
+    Redirections from secure to non-secure URLs should have the
+    "Referrer" header removed if necessary.
+    """
+    settings = {'REFERRER_POLICY': POLICY_STRICT_ORIGIN}
+    scenarii = [
+        (
+            'http://scrapytest.org/101',
+            'http://scrapytest.org/102',
+            (
+                (301, 'http://scrapytest.org/103'),
+                (301, 'http://scrapytest.org/104'),
+            ),
+            b'http://scrapytest.org/',  # send origin
+            b'http://scrapytest.org/',  # redirects to same origin: send origin
+        ),
+        (
+            'https://scrapytest.org/201',
+            'https://scrapytest.org/202',
+            (
+                # redirecting to non-secure URL: no referrer
+                (301, 'http://scrapytest.org/203'),
+            ),
+            b'https://scrapytest.org/',
+            None,
+        ),
+        (
+            'https://scrapytest.org/301',
+            'https://scrapytest.org/302',
+            (
+                # redirecting to non-secure URL (different domain): no referrer
+                (301, 'http://example.com/303'),
+            ),
+            b'https://scrapytest.org/',
+            None,
+        ),
+        (
+            'http://scrapy.org/401',
+            'http://example.com/402',
+            (
+                (301, 'http://scrapytest.org/403'),
+            ),
+            b'http://scrapy.org/',
+            b'http://scrapy.org/',
+        ),
+        (
+            'https://scrapy.org/501',
+            'https://example.com/502',
+            (
+                # HTTPS all along, so origin referrer is kept as-is
+                (301, 'https://google.com/503'),
+                (301, 'https://facebook.com/504'),
+            ),
+            b'https://scrapy.org/',
+            b'https://scrapy.org/',
+        ),
+        (
+            'https://scrapytest.org/601',
+            'http://scrapytest.org/602',                # TLS to non-TLS: no referrer
+            (
+                (301, 'https://scrapytest.org/603'),    # TLS URL again: (still) no referrer
+            ),
+            None,
+            None,
+        ),
+    ]
+
+
+class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
+    """
+    Origin When Cross-Origin policy sends the full URL as "Referer",
+    unless the target's origin is different (different domain, different protocol)
+    in which case only the origin is sent.
+
+    Redirections to a different origin should strip the "Referer"
+    to the parent origin.
+    """
+    settings = {'REFERRER_POLICY': POLICY_ORIGIN_WHEN_CROSS_ORIGIN}
+    scenarii = [
+        (
+            'http://scrapytest.org/101',      # origin
+            'http://scrapytest.org/102',      # target + redirection
+            (
+                # redirections: code, URL
+                (301, 'http://scrapytest.org/103'),
+                (301, 'http://scrapytest.org/104'),
+            ),
+            b'http://scrapytest.org/101',  # expected initial referer
+            b'http://scrapytest.org/101',  # expected referer for the redirection request
+        ),
+        (
+            'https://scrapytest.org/201',
+            'https://scrapytest.org/202',
+            (
+                # redirecting to non-secure URL: send origin
+                (301, 'http://scrapytest.org/203'),
+            ),
+            b'https://scrapytest.org/201',
+            b'https://scrapytest.org/',
+        ),
+        (
+            'https://scrapytest.org/301',
+            'https://scrapytest.org/302',
+            (
+                # redirecting to non-secure URL (different domain): send origin
+                (301, 'http://example.com/303'),
+            ),
+            b'https://scrapytest.org/301',
+            b'https://scrapytest.org/',
+        ),
+        (
+            'http://scrapy.org/401',
+            'http://example.com/402',
+            (
+                (301, 'http://scrapytest.org/403'),
+            ),
+            b'http://scrapy.org/',
+            b'http://scrapy.org/',
+        ),
+        (
+            'https://scrapy.org/501',
+            'https://example.com/502',
+            (
+                # all different domains: send origin
+                (301, 'https://google.com/503'),
+                (301, 'https://facebook.com/504'),
+            ),
+            b'https://scrapy.org/',
+            b'https://scrapy.org/',
+        ),
+        (
+            'https://scrapytest.org/301',
+            'http://scrapytest.org/302',                # TLS to non-TLS: send origin
+            (
+                (301, 'https://scrapytest.org/303'),    # TLS URL again: send origin (also)
+            ),
+            b'https://scrapytest.org/',
+            b'https://scrapytest.org/',
+        ),
+    ]
+
+
+class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
+    """
+    Strict Origin When Cross-Origin policy sends the full URL as "Referer",
+    unless the target's origin is different (different domain, different protocol)
+    in which case only the origin is sent...
+    Unless there's also a downgrade in security and then the "Referer" header
+    is not sent.
+
+    Redirections to a different origin should strip the "Referer" to the parent origin,
+    and from https:// to http:// will remove the "Referer" header.
+    """
+    settings = {'REFERRER_POLICY': POLICY_STRICT_ORIGIN_WHEN_CROSS_ORIGIN}
+    scenarii = [
+        (
+            'http://scrapytest.org/101',      # origin
+            'http://scrapytest.org/102',      # target + redirection
+            (
+                # redirections: code, URL
+                (301, 'http://scrapytest.org/103'),
+                (301, 'http://scrapytest.org/104'),
+            ),
+            b'http://scrapytest.org/101',  # expected initial referer
+            b'http://scrapytest.org/101',  # expected referer for the redirection request
+        ),
+        (
+            'https://scrapytest.org/201',
+            'https://scrapytest.org/202',
+            (
+                # redirecting to non-secure URL: do not send the "Referer" header
+                (301, 'http://scrapytest.org/203'),
+            ),
+            b'https://scrapytest.org/201',
+            None,
+        ),
+        (
+            'https://scrapytest.org/301',
+            'https://scrapytest.org/302',
+            (
+                # redirecting to non-secure URL (different domain): send origin
+                (301, 'http://example.com/303'),
+            ),
+            b'https://scrapytest.org/301',
+            None,
+        ),
+        (
+            'http://scrapy.org/401',
+            'http://example.com/402',
+            (
+                (301, 'http://scrapytest.org/403'),
+            ),
+            b'http://scrapy.org/',
+            b'http://scrapy.org/',
+        ),
+        (
+            'https://scrapy.org/501',
+            'https://example.com/502',
+            (
+                # all different domains: send origin
+                (301, 'https://google.com/503'),
+                (301, 'https://facebook.com/504'),
+            ),
+            b'https://scrapy.org/',
+            b'https://scrapy.org/',
+        ),
+        (
+            'https://scrapytest.org/601',
+            'http://scrapytest.org/602',                # TLS to non-TLS: do not send "Referer"
+            (
+                (301, 'https://scrapytest.org/603'),    # TLS URL again: (still) send nothing
+            ),
+            None,
+            None,
+        ),
+    ]
diff --git a/tests/test_spidermiddleware_urllength.py b/tests/test_spidermiddleware_urllength.py
index ca2de18f2..5ef2b23fd 100644
--- a/tests/test_spidermiddleware_urllength.py
+++ b/tests/test_spidermiddleware_urllength.py
@@ -1,8 +1,8 @@
 from unittest import TestCase
 
-from scrapy.contrib.spidermiddleware.urllength import UrlLengthMiddleware
+from scrapy.spidermiddlewares.urllength import UrlLengthMiddleware
 from scrapy.http import Response, Request
-from scrapy.spider import Spider
+from scrapy.spiders import Spider
 
 
 class TestUrlLengthMiddleware(TestCase):
@@ -17,5 +17,4 @@ class TestUrlLengthMiddleware(TestCase):
         mw = UrlLengthMiddleware(maxlength=25)
         spider = Spider('foo')
         out = list(mw.process_spider_output(res, reqs, spider))
-        self.assertEquals(out, [short_url_req])
-
+        self.assertEqual(out, [short_url_req])
diff --git a/tests/test_spiderstate.py b/tests/test_spiderstate.py
new file mode 100644
index 000000000..383fadfeb
--- /dev/null
+++ b/tests/test_spiderstate.py
@@ -0,0 +1,46 @@
+import os
+from datetime import datetime
+import shutil
+from twisted.trial import unittest
+
+from scrapy.extensions.spiderstate import SpiderState
+from scrapy.spiders import Spider
+from scrapy.exceptions import NotConfigured
+from scrapy.utils.test import get_crawler
+
+
+class SpiderStateTest(unittest.TestCase):
+
+    def test_store_load(self):
+        jobdir = self.mktemp()
+        os.mkdir(jobdir)
+        try:
+            spider = Spider(name='default')
+            dt = datetime.now()
+
+            ss = SpiderState(jobdir)
+            ss.spider_opened(spider)
+            spider.state['one'] = 1
+            spider.state['dt'] = dt
+            ss.spider_closed(spider)
+
+            spider2 = Spider(name='default')
+            ss2 = SpiderState(jobdir)
+            ss2.spider_opened(spider2)
+            self.assertEqual(spider.state, {'one': 1, 'dt': dt})
+            ss2.spider_closed(spider2)
+        finally:
+            shutil.rmtree(jobdir)
+
+    def test_state_attribute(self):
+        # state attribute must be present if jobdir is not set, to provide a
+        # consistent interface
+        spider = Spider(name='default')
+        ss = SpiderState()
+        ss.spider_opened(spider)
+        self.assertEqual(spider.state, {})
+        ss.spider_closed(spider)
+
+    def test_not_configured(self):
+        crawler = get_crawler(Spider)
+        self.assertRaises(NotConfigured, SpiderState.from_crawler, crawler)
diff --git a/tests/test_squeue.py b/tests/test_squeues.py
similarity index 53%
rename from tests/test_squeue.py
rename to tests/test_squeues.py
index 83ffcc4b7..becacce62 100644
--- a/tests/test_squeue.py
+++ b/tests/test_squeues.py
@@ -1,25 +1,41 @@
+import pickle
+import sys
+
 from queuelib.tests import test_queue as t
-from scrapy.squeue import MarshalFifoDiskQueue, MarshalLifoDiskQueue, PickleFifoDiskQueue, PickleLifoDiskQueue
+from scrapy.squeues import (
+    MarshalFifoDiskQueueNonRequest as MarshalFifoDiskQueue,
+    MarshalLifoDiskQueueNonRequest as MarshalLifoDiskQueue,
+    PickleFifoDiskQueueNonRequest as PickleFifoDiskQueue,
+    PickleLifoDiskQueueNonRequest as PickleLifoDiskQueue
+)
 from scrapy.item import Item, Field
 from scrapy.http import Request
-from scrapy.contrib.loader import ItemLoader
+from scrapy.loader import ItemLoader
+from scrapy.selector import Selector
+
 
 class TestItem(Item):
     name = Field()
 
+
 def _test_procesor(x):
     return x + x
 
+
 class TestLoader(ItemLoader):
     default_item_class = TestItem
     name_out = staticmethod(_test_procesor)
 
-class MarshalFifoDiskQueueTest(t.FifoDiskQueueTest):
 
-    chunksize = 100000
+def nonserializable_object_test(self):
+    q = self.queue()
+    self.assertRaises(ValueError, q.push, lambda x: x)
+    # Selectors should fail (lxml.html.HtmlElement objects can't be pickled)
+    sel = Selector(text='<html><body><p>some text</p></body></html>')
+    self.assertRaises(ValueError, q.push, sel)
 
-    def queue(self):
-        return MarshalFifoDiskQueue(self.qdir, chunksize=self.chunksize)
+
+class FifoDiskQueueTestMixin:
 
     def test_serialize(self):
         q = self.queue()
@@ -30,29 +46,38 @@ class MarshalFifoDiskQueueTest(t.FifoDiskQueueTest):
         self.assertEqual(q.pop(), 123)
         self.assertEqual(q.pop(), {'a': 'dict'})
 
-    def test_nonserializable_object(self):
-        q = self.queue()
-        self.assertRaises(ValueError, q.push, lambda x: x)
+    test_nonserializable_object = nonserializable_object_test
+
+
+class MarshalFifoDiskQueueTest(t.FifoDiskQueueTest, FifoDiskQueueTestMixin):
+    chunksize = 100000
+
+    def queue(self):
+        return MarshalFifoDiskQueue(self.qpath, chunksize=self.chunksize)
+
 
 class ChunkSize1MarshalFifoDiskQueueTest(MarshalFifoDiskQueueTest):
     chunksize = 1
 
+
 class ChunkSize2MarshalFifoDiskQueueTest(MarshalFifoDiskQueueTest):
     chunksize = 2
 
+
 class ChunkSize3MarshalFifoDiskQueueTest(MarshalFifoDiskQueueTest):
     chunksize = 3
 
+
 class ChunkSize4MarshalFifoDiskQueueTest(MarshalFifoDiskQueueTest):
     chunksize = 4
 
 
-class PickleFifoDiskQueueTest(MarshalFifoDiskQueueTest):
+class PickleFifoDiskQueueTest(t.FifoDiskQueueTest, FifoDiskQueueTestMixin):
 
     chunksize = 100000
 
     def queue(self):
-        return PickleFifoDiskQueue(self.qdir, chunksize=self.chunksize)
+        return PickleFifoDiskQueue(self.qpath, chunksize=self.chunksize)
 
     def test_serialize_item(self):
         q = self.queue()
@@ -64,12 +89,12 @@ class PickleFifoDiskQueueTest(MarshalFifoDiskQueueTest):
 
     def test_serialize_loader(self):
         q = self.queue()
-        l = TestLoader()
-        q.push(l)
-        l2 = q.pop()
-        assert isinstance(l2, TestLoader)
-        assert l2.default_item_class is TestItem
-        self.assertEqual(l2.name_out('x'), 'xx')
+        loader = TestLoader()
+        q.push(loader)
+        loader2 = q.pop()
+        assert isinstance(loader2, TestLoader)
+        assert loader2.default_item_class is TestItem
+        self.assertEqual(loader2.name_out('x'), 'xx')
 
     def test_serialize_request_recursive(self):
         q = self.queue()
@@ -81,23 +106,39 @@ class PickleFifoDiskQueueTest(MarshalFifoDiskQueueTest):
         self.assertEqual(r.url, r2.url)
         assert r2.meta['request'] is r2
 
+    def test_non_pickable_object(self):
+        q = self.queue()
+        try:
+            q.push(lambda x: x)
+        except ValueError as exc:
+            if hasattr(sys, "pypy_version_info"):
+                self.assertIsInstance(exc.__context__, pickle.PicklingError)
+            else:
+                self.assertIsInstance(exc.__context__, AttributeError)
+        sel = Selector(text='<html><body><p>some text</p></body></html>')
+        try:
+            q.push(sel)
+        except ValueError as exc:
+            self.assertIsInstance(exc.__context__, TypeError)
+
+
 class ChunkSize1PickleFifoDiskQueueTest(PickleFifoDiskQueueTest):
     chunksize = 1
 
+
 class ChunkSize2PickleFifoDiskQueueTest(PickleFifoDiskQueueTest):
     chunksize = 2
 
+
 class ChunkSize3PickleFifoDiskQueueTest(PickleFifoDiskQueueTest):
     chunksize = 3
 
+
 class ChunkSize4PickleFifoDiskQueueTest(PickleFifoDiskQueueTest):
     chunksize = 4
 
 
-class MarshalLifoDiskQueueTest(t.LifoDiskQueueTest):
-
-    def queue(self):
-        return MarshalLifoDiskQueue(self.path)
+class LifoDiskQueueTestMixin:
 
     def test_serialize(self):
         q = self.queue()
@@ -108,15 +149,19 @@ class MarshalLifoDiskQueueTest(t.LifoDiskQueueTest):
         self.assertEqual(q.pop(), 123)
         self.assertEqual(q.pop(), 'a')
 
-    def test_nonserializable_object(self):
-        q = self.queue()
-        self.assertRaises(ValueError, q.push, lambda x: x)
+    test_nonserializable_object = nonserializable_object_test
 
 
-class PickleLifoDiskQueueTest(MarshalLifoDiskQueueTest):
+class MarshalLifoDiskQueueTest(t.LifoDiskQueueTest, LifoDiskQueueTestMixin):
 
     def queue(self):
-        return PickleLifoDiskQueue(self.path)
+        return MarshalLifoDiskQueue(self.qpath)
+
+
+class PickleLifoDiskQueueTest(t.LifoDiskQueueTest, LifoDiskQueueTestMixin):
+
+    def queue(self):
+        return PickleLifoDiskQueue(self.qpath)
 
     def test_serialize_item(self):
         q = self.queue()
@@ -128,12 +173,12 @@ class PickleLifoDiskQueueTest(MarshalLifoDiskQueueTest):
 
     def test_serialize_loader(self):
         q = self.queue()
-        l = TestLoader()
-        q.push(l)
-        l2 = q.pop()
-        assert isinstance(l2, TestLoader)
-        assert l2.default_item_class is TestItem
-        self.assertEqual(l2.name_out('x'), 'xx')
+        loader = TestLoader()
+        q.push(loader)
+        loader2 = q.pop()
+        assert isinstance(loader2, TestLoader)
+        assert loader2.default_item_class is TestItem
+        self.assertEqual(loader2.name_out('x'), 'xx')
 
     def test_serialize_request_recursive(self):
         q = self.queue()
diff --git a/tests/test_stats.py b/tests/test_stats.py
index 795e8e3bd..2bbbb9e2c 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -1,14 +1,60 @@
+from datetime import datetime
 import unittest
+from unittest import mock
 
-from scrapy.spider import Spider
-from scrapy.statscol import StatsCollector, DummyStatsCollector
+from scrapy.extensions.corestats import CoreStats
+from scrapy.spiders import Spider
+from scrapy.statscollectors import StatsCollector, DummyStatsCollector
 from scrapy.utils.test import get_crawler
 
+
+class CoreStatsExtensionTest(unittest.TestCase):
+
+    def setUp(self):
+        self.crawler = get_crawler(Spider)
+        self.spider = self.crawler._create_spider('foo')
+
+    @mock.patch('scrapy.extensions.corestats.datetime')
+    def test_core_stats_default_stats_collector(self, mock_datetime):
+        fixed_datetime = datetime(2019, 12, 1, 11, 38)
+        mock_datetime.utcnow = mock.Mock(return_value=fixed_datetime)
+        self.crawler.stats = StatsCollector(self.crawler)
+        ext = CoreStats.from_crawler(self.crawler)
+        ext.spider_opened(self.spider)
+        ext.item_scraped({}, self.spider)
+        ext.response_received(self.spider)
+        ext.item_dropped({}, self.spider, ZeroDivisionError())
+        ext.spider_closed(self.spider, 'finished')
+        self.assertEqual(
+            ext.stats._stats,
+            {
+                'start_time': fixed_datetime,
+                'finish_time': fixed_datetime,
+                'item_scraped_count': 1,
+                'response_received_count': 1,
+                'item_dropped_count': 1,
+                'item_dropped_reasons_count/ZeroDivisionError': 1,
+                'finish_reason': 'finished',
+                'elapsed_time_seconds': 0.0,
+            }
+        )
+
+    def test_core_stats_dummy_stats_collector(self):
+        self.crawler.stats = DummyStatsCollector(self.crawler)
+        ext = CoreStats.from_crawler(self.crawler)
+        ext.spider_opened(self.spider)
+        ext.item_scraped({}, self.spider)
+        ext.response_received(self.spider)
+        ext.item_dropped({}, self.spider, ZeroDivisionError())
+        ext.spider_closed(self.spider, 'finished')
+        self.assertEqual(ext.stats._stats, {})
+
+
 class StatsCollectorTest(unittest.TestCase):
 
     def setUp(self):
-        self.crawler = get_crawler()
-        self.spider = Spider('foo')
+        self.crawler = get_crawler(Spider)
+        self.spider = self.crawler._create_spider('foo')
 
     def test_collector(self):
         stats = StatsCollector(self.crawler)
@@ -50,6 +96,3 @@ class StatsCollectorTest(unittest.TestCase):
         stats.set_value('test', 'value', spider=self.spider)
         self.assertEqual(stats.get_stats(), {})
         self.assertEqual(stats.get_stats('a'), {})
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/test_toplevel.py b/tests/test_toplevel.py
index 17cf82213..fdc5df166 100644
--- a/tests/test_toplevel.py
+++ b/tests/test_toplevel.py
@@ -1,27 +1,23 @@
 from unittest import TestCase
-import six
+
 import scrapy
 
 
 class ToplevelTestCase(TestCase):
 
     def test_version(self):
-        self.assertIs(type(scrapy.__version__), six.text_type)
+        self.assertIs(type(scrapy.__version__), str)
 
     def test_version_info(self):
         self.assertIs(type(scrapy.version_info), tuple)
 
-    def test_optional_features(self):
-        self.assertIs(type(scrapy.optional_features), set)
-        self.assertIn('ssl', scrapy.optional_features)
-
     def test_request_shortcut(self):
         from scrapy.http import Request, FormRequest
         self.assertIs(scrapy.Request, Request)
         self.assertIs(scrapy.FormRequest, FormRequest)
 
     def test_spider_shortcut(self):
-        from scrapy.spider import Spider
+        from scrapy.spiders import Spider
         self.assertIs(scrapy.Spider, Spider)
 
     def test_selector_shortcut(self):
diff --git a/tests/test_urlparse_monkeypatches.py b/tests/test_urlparse_monkeypatches.py
index 052dde37f..bea0cf3e5 100644
--- a/tests/test_urlparse_monkeypatches.py
+++ b/tests/test_urlparse_monkeypatches.py
@@ -1,4 +1,4 @@
-from six.moves.urllib.parse import urlparse
+from urllib.parse import urlparse
 import unittest
 
 
@@ -6,7 +6,7 @@ class UrlparseTestCase(unittest.TestCase):
 
     def test_s3_url(self):
         p = urlparse('s3://bucket/key/name?param=value')
-        self.assertEquals(p.scheme, 's3')
-        self.assertEquals(p.hostname, 'bucket')
-        self.assertEquals(p.path, '/key/name')
-        self.assertEquals(p.query, 'param=value')
+        self.assertEqual(p.scheme, 's3')
+        self.assertEqual(p.hostname, 'bucket')
+        self.assertEqual(p.path, '/key/name')
+        self.assertEqual(p.query, 'param=value')
diff --git a/tests/test_utils_asyncio.py b/tests/test_utils_asyncio.py
new file mode 100644
index 000000000..a2114bd18
--- /dev/null
+++ b/tests/test_utils_asyncio.py
@@ -0,0 +1,22 @@
+import platform
+import sys
+from unittest import skipIf, TestCase
+
+from pytest import mark
+
+from scrapy.utils.reactor import is_asyncio_reactor_installed, install_reactor
+
+
+@mark.usefixtures('reactor_pytest')
+class AsyncioTest(TestCase):
+
+    def test_is_asyncio_reactor_installed(self):
+        # the result should depend only on the pytest --reactor argument
+        self.assertEqual(is_asyncio_reactor_installed(), self.reactor_pytest == 'asyncio')
+
+    # https://twistedmatrix.com/trac/ticket/9766
+    @skipIf(platform.system() == 'Windows' and sys.version_info >= (3, 8),
+            "the asyncio reactor is broken on Windows when running Python ≥ 3.8")
+    def test_install_asyncio_reactor(self):
+        # this should do nothing
+        install_reactor("twisted.internet.asyncioreactor.AsyncioSelectorReactor")
diff --git a/tests/test_utils_conf.py b/tests/test_utils_conf.py
index 35277a423..ccc65c4fd 100644
--- a/tests/test_utils_conf.py
+++ b/tests/test_utils_conf.py
@@ -1,22 +1,204 @@
 import unittest
+import warnings
+
+from scrapy.exceptions import UsageError, ScrapyDeprecationWarning
+from scrapy.settings import BaseSettings, Settings
+from scrapy.utils.conf import (
+    arglist_to_dict,
+    build_component_list,
+    feed_complete_default_values_from_settings,
+    feed_process_params_from_cli
+)
+
+
+class BuildComponentListTest(unittest.TestCase):
+
+    def test_build_dict(self):
+        d = {'one': 1, 'two': None, 'three': 8, 'four': 4}
+        self.assertEqual(build_component_list(d, convert=lambda x: x),
+                         ['one', 'four', 'three'])
+
+    def test_backward_compatible_build_dict(self):
+        base = {'one': 1, 'two': 2, 'three': 3, 'five': 5, 'six': None}
+        custom = {'two': None, 'three': 8, 'four': 4}
+        self.assertEqual(build_component_list(base, custom,
+                                              convert=lambda x: x),
+                         ['one', 'four', 'five', 'three'])
+
+    def test_return_list(self):
+        custom = ['a', 'b', 'c']
+        self.assertEqual(build_component_list(None, custom,
+                                              convert=lambda x: x),
+                         custom)
+
+    def test_map_dict(self):
+        custom = {'one': 1, 'two': 2, 'three': 3}
+        self.assertEqual(build_component_list({}, custom,
+                                              convert=lambda x: x.upper()),
+                         ['ONE', 'TWO', 'THREE'])
+
+    def test_map_list(self):
+        custom = ['a', 'b', 'c']
+        self.assertEqual(build_component_list(None, custom,
+                                              lambda x: x.upper()),
+                         ['A', 'B', 'C'])
+
+    def test_duplicate_components_in_dict(self):
+        duplicate_dict = {'one': 1, 'two': 2, 'ONE': 4}
+        self.assertRaises(ValueError, build_component_list, {}, duplicate_dict,
+                          convert=lambda x: x.lower())
+
+    def test_duplicate_components_in_list(self):
+        duplicate_list = ['a', 'b', 'a']
+        self.assertRaises(ValueError, build_component_list, None,
+                          duplicate_list, convert=lambda x: x)
+
+    def test_duplicate_components_in_basesettings(self):
+        # Higher priority takes precedence
+        duplicate_bs = BaseSettings({'one': 1, 'two': 2}, priority=0)
+        duplicate_bs.set('ONE', 4, priority=10)
+        self.assertEqual(build_component_list(duplicate_bs,
+                                              convert=lambda x: x.lower()),
+                         ['two', 'one'])
+        duplicate_bs.set('one', duplicate_bs['one'], priority=20)
+        self.assertEqual(build_component_list(duplicate_bs,
+                                              convert=lambda x: x.lower()),
+                         ['one', 'two'])
+        # Same priority raises ValueError
+        duplicate_bs.set('ONE', duplicate_bs['ONE'], priority=20)
+        self.assertRaises(ValueError, build_component_list, duplicate_bs,
+                          convert=lambda x: x.lower())
+
+    def test_valid_numbers(self):
+        # work well with None and numeric values
+        d = {'a': 10, 'b': None, 'c': 15, 'd': 5.0}
+        self.assertEqual(build_component_list(d, convert=lambda x: x),
+                         ['d', 'a', 'c'])
+        d = {'a': 33333333333333333333, 'b': 11111111111111111111, 'c': 22222222222222222222}
+        self.assertEqual(build_component_list(d, convert=lambda x: x),
+                         ['b', 'c', 'a'])
+        # raise exception for invalid values
+        d = {'one': '5'}
+        self.assertRaises(ValueError, build_component_list, {}, d, convert=lambda x: x)
+        d = {'one': '1.0'}
+        self.assertRaises(ValueError, build_component_list, {}, d, convert=lambda x: x)
+        d = {'one': [1, 2, 3]}
+        self.assertRaises(ValueError, build_component_list, {}, d, convert=lambda x: x)
+        d = {'one': {'a': 'a', 'b': 2}}
+        self.assertRaises(ValueError, build_component_list, {}, d, convert=lambda x: x)
+        d = {'one': 'lorem ipsum'}
+        self.assertRaises(ValueError, build_component_list, {}, d, convert=lambda x: x)
 
-from scrapy.utils.conf import build_component_list, arglist_to_dict
 
 class UtilsConfTestCase(unittest.TestCase):
 
-    def test_build_component_list(self):
-        base = {'one': 1, 'two': 2, 'three': 3, 'five': 5, 'six': None}
-        custom = {'two': None, 'three': 8, 'four': 4}
-        self.assertEqual(build_component_list(base, custom),
-                         ['one', 'four', 'five', 'three'])
-
-        custom = ['a', 'b', 'c']
-        self.assertEqual(build_component_list(base, custom), custom)
-
     def test_arglist_to_dict(self):
-        self.assertEqual(arglist_to_dict(['arg1=val1', 'arg2=val2']),
+        self.assertEqual(
+            arglist_to_dict(['arg1=val1', 'arg2=val2']),
             {'arg1': 'val1', 'arg2': 'val2'})
 
 
+class FeedExportConfigTestCase(unittest.TestCase):
+
+    def test_feed_export_config_invalid_format(self):
+        settings = Settings()
+        self.assertRaises(UsageError, feed_process_params_from_cli, settings, ['items.dat'], 'noformat')
+
+    def test_feed_export_config_mismatch(self):
+        settings = Settings()
+        self.assertRaises(
+            UsageError,
+            feed_process_params_from_cli, settings, ['items1.dat', 'items2.dat'], 'noformat'
+        )
+
+    def test_feed_export_config_backward_compatible(self):
+        with warnings.catch_warnings(record=True) as cw:
+            settings = Settings()
+            self.assertEqual(
+                {'items.dat': {'format': 'csv'}},
+                feed_process_params_from_cli(settings, ['items.dat'], 'csv')
+            )
+            self.assertEqual(cw[0].category, ScrapyDeprecationWarning)
+
+    def test_feed_export_config_explicit_formats(self):
+        settings = Settings()
+        self.assertEqual(
+            {'items_1.dat': {'format': 'json'}, 'items_2.dat': {'format': 'xml'}, 'items_3.dat': {'format': 'csv'}},
+            feed_process_params_from_cli(settings, ['items_1.dat:json', 'items_2.dat:xml', 'items_3.dat:csv'])
+        )
+
+    def test_feed_export_config_implicit_formats(self):
+        settings = Settings()
+        self.assertEqual(
+            {'items_1.json': {'format': 'json'}, 'items_2.xml': {'format': 'xml'}, 'items_3.csv': {'format': 'csv'}},
+            feed_process_params_from_cli(settings, ['items_1.json', 'items_2.xml', 'items_3.csv'])
+        )
+
+    def test_feed_export_config_stdout(self):
+        settings = Settings()
+        self.assertEqual(
+            {'stdout:': {'format': 'pickle'}},
+            feed_process_params_from_cli(settings, ['-:pickle'])
+        )
+
+    def test_feed_export_config_overwrite(self):
+        settings = Settings()
+        self.assertEqual(
+            {'output.json': {'format': 'json', 'overwrite': True}},
+            feed_process_params_from_cli(settings, [], None, ['output.json'])
+        )
+
+    def test_output_and_overwrite_output(self):
+        with self.assertRaises(UsageError):
+            feed_process_params_from_cli(
+                Settings(),
+                ['output1.json'],
+                None,
+                ['output2.json'],
+            )
+
+    def test_feed_complete_default_values_from_settings_empty(self):
+        feed = {}
+        settings = Settings({
+            "FEED_EXPORT_ENCODING": "custom encoding",
+            "FEED_EXPORT_FIELDS": ["f1", "f2", "f3"],
+            "FEED_EXPORT_INDENT": 42,
+            "FEED_STORE_EMPTY": True,
+            "FEED_URI_PARAMS": (1, 2, 3, 4),
+            "FEED_EXPORT_BATCH_ITEM_COUNT": 2,
+        })
+        new_feed = feed_complete_default_values_from_settings(feed, settings)
+        self.assertEqual(new_feed, {
+            "encoding": "custom encoding",
+            "fields": ["f1", "f2", "f3"],
+            "indent": 42,
+            "store_empty": True,
+            "uri_params": (1, 2, 3, 4),
+            "batch_item_count": 2,
+        })
+
+    def test_feed_complete_default_values_from_settings_non_empty(self):
+        feed = {
+            "encoding": "other encoding",
+            "fields": None,
+        }
+        settings = Settings({
+            "FEED_EXPORT_ENCODING": "custom encoding",
+            "FEED_EXPORT_FIELDS": ["f1", "f2", "f3"],
+            "FEED_EXPORT_INDENT": 42,
+            "FEED_STORE_EMPTY": True,
+            "FEED_EXPORT_BATCH_ITEM_COUNT": 2,
+        })
+        new_feed = feed_complete_default_values_from_settings(feed, settings)
+        self.assertEqual(new_feed, {
+            "encoding": "other encoding",
+            "fields": None,
+            "indent": 42,
+            "store_empty": True,
+            "uri_params": None,
+            "batch_item_count": 2,
+        })
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_utils_console.py b/tests/test_utils_console.py
new file mode 100644
index 000000000..380c41367
--- /dev/null
+++ b/tests/test_utils_console.py
@@ -0,0 +1,45 @@
+import unittest
+
+from scrapy.utils.console import get_shell_embed_func
+try:
+    import bpython
+    bpy = True
+    del bpython
+except ImportError:
+    bpy = False
+try:
+    import IPython
+    ipy = True
+    del IPython
+except ImportError:
+    ipy = False
+
+
+class UtilsConsoleTestCase(unittest.TestCase):
+
+    def test_get_shell_embed_func(self):
+
+        shell = get_shell_embed_func(['invalid'])
+        self.assertEqual(shell, None)
+
+        shell = get_shell_embed_func(['invalid', 'python'])
+        self.assertTrue(callable(shell))
+        self.assertEqual(shell.__name__, '_embed_standard_shell')
+
+    @unittest.skipIf(not bpy, 'bpython not available in testenv')
+    def test_get_shell_embed_func2(self):
+
+        shell = get_shell_embed_func(['bpython'])
+        self.assertTrue(callable(shell))
+        self.assertEqual(shell.__name__, '_embed_bpython_shell')
+
+    @unittest.skipIf(not ipy, 'IPython not available in testenv')
+    def test_get_shell_embed_func3(self):
+
+        # default shell should be 'ipython'
+        shell = get_shell_embed_func()
+        self.assertEqual(shell.__name__, '_embed_ipython_shell')
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_utils_curl.py b/tests/test_utils_curl.py
new file mode 100644
index 000000000..6b05c8771
--- /dev/null
+++ b/tests/test_utils_curl.py
@@ -0,0 +1,231 @@
+import unittest
+import warnings
+
+from w3lib.http import basic_auth_header
+
+from scrapy import Request
+from scrapy.utils.curl import curl_to_request_kwargs
+
+
+class CurlToRequestKwargsTest(unittest.TestCase):
+    maxDiff = 5000
+
+    def _test_command(self, curl_command, expected_result):
+        result = curl_to_request_kwargs(curl_command)
+        self.assertEqual(result, expected_result)
+        try:
+            Request(**result)
+        except TypeError as e:
+            self.fail("Request kwargs are not correct {}".format(e))
+
+    def test_get(self):
+        curl_command = "curl http://example.org/"
+        expected_result = {"method": "GET", "url": "http://example.org/"}
+        self._test_command(curl_command, expected_result)
+
+    def test_get_without_scheme(self):
+        curl_command = "curl www.example.org"
+        expected_result = {"method": "GET", "url": "http://www.example.org"}
+        self._test_command(curl_command, expected_result)
+
+    def test_get_basic_auth(self):
+        curl_command = 'curl "https://api.test.com/" -u "some_username:some_password"'
+        expected_result = {
+            "method": "GET",
+            "url": "https://api.test.com/",
+            "headers": [
+                (
+                    "Authorization",
+                    basic_auth_header("some_username", "some_password")
+                )
+            ],
+        }
+        self._test_command(curl_command, expected_result)
+
+    def test_get_complex(self):
+        curl_command = (
+            "curl 'http://httpbin.org/get' -H 'Accept-Encoding: gzip, deflate'"
+            " -H 'Accept-Language: en-US,en;q=0.9,ru;q=0.8,es;q=0.7' -H 'Upgra"
+            "de-Insecure-Requests: 1' -H 'User-Agent: Mozilla/5.0 (X11; Linux "
+            "x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/62"
+            ".0.3202.75 Chrome/62.0.3202.75 Safari/537.36' -H 'Accept: text/ht"
+            "ml,application/xhtml+xml,application/xml;q=0.9,image/webp,image/a"
+            "png,*/*;q=0.8' -H 'Referer: http://httpbin.org/' -H 'Cookie: _gau"
+            "ges_unique_year=1; _gauges_unique=1; _gauges_unique_month=1; _gau"
+            "ges_unique_hour=1; _gauges_unique_day=1' -H 'Connection: keep-ali"
+            "ve' --compressed"
+        )
+        expected_result = {
+            "method": "GET",
+            "url": "http://httpbin.org/get",
+            "headers": [
+                ("Accept-Encoding", "gzip, deflate"),
+                ("Accept-Language", "en-US,en;q=0.9,ru;q=0.8,es;q=0.7"),
+                ("Upgrade-Insecure-Requests", "1"),
+                (
+                    "User-Agent",
+                    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML"
+                    ", like Gecko) Ubuntu Chromium/62.0.3202.75 Chrome/62.0.32"
+                    "02.75 Safari/537.36",
+                ),
+                (
+                    "Accept",
+                    "text/html,application/xhtml+xml,application/xml;q=0.9,ima"
+                    "ge/webp,image/apng,*/*;q=0.8",
+                ),
+                ("Referer", "http://httpbin.org/"),
+                ("Connection", "keep-alive"),
+            ],
+            "cookies": {
+                '_gauges_unique_year': '1',
+                '_gauges_unique_hour': '1',
+                '_gauges_unique_day': '1',
+                '_gauges_unique': '1',
+                '_gauges_unique_month': '1'
+            },
+        }
+        self._test_command(curl_command, expected_result)
+
+    def test_post(self):
+        curl_command = (
+            "curl 'http://httpbin.org/post' -X POST -H 'Cookie: _gauges_unique"
+            "_year=1; _gauges_unique=1; _gauges_unique_month=1; _gauges_unique"
+            "_hour=1; _gauges_unique_day=1' -H 'Origin: http://httpbin.org' -H"
+            " 'Accept-Encoding: gzip, deflate' -H 'Accept-Language: en-US,en;q"
+            "=0.9,ru;q=0.8,es;q=0.7' -H 'Upgrade-Insecure-Requests: 1' -H 'Use"
+            "r-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTM"
+            "L, like Gecko) Ubuntu Chromium/62.0.3202.75 Chrome/62.0.3202.75 S"
+            "afari/537.36' -H 'Content-Type: application/x-www-form-urlencoded"
+            "' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0"
+            ".9,image/webp,image/apng,*/*;q=0.8' -H 'Cache-Control: max-age=0'"
+            " -H 'Referer: http://httpbin.org/forms/post' -H 'Connection: keep"
+            "-alive' --data 'custname=John+Smith&custtel=500&custemail=jsmith%"
+            "40example.org&size=small&topping=cheese&topping=onion&delivery=12"
+            "%3A15&comments=' --compressed"
+        )
+        expected_result = {
+            "method": "POST",
+            "url": "http://httpbin.org/post",
+            "body": "custname=John+Smith&custtel=500&custemail=jsmith%40exampl"
+                    "e.org&size=small&topping=cheese&topping=onion&delivery=12"
+                    "%3A15&comments=",
+            "cookies": {
+                '_gauges_unique_year': '1',
+                '_gauges_unique_hour': '1',
+                '_gauges_unique_day': '1',
+                '_gauges_unique': '1',
+                '_gauges_unique_month': '1'
+            },
+            "headers": [
+                ("Origin", "http://httpbin.org"),
+                ("Accept-Encoding", "gzip, deflate"),
+                ("Accept-Language", "en-US,en;q=0.9,ru;q=0.8,es;q=0.7"),
+                ("Upgrade-Insecure-Requests", "1"),
+                (
+                    "User-Agent",
+                    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML"
+                    ", like Gecko) Ubuntu Chromium/62.0.3202.75 Chrome/62.0.32"
+                    "02.75 Safari/537.36",
+                ),
+                ("Content-Type", "application/x-www-form-urlencoded"),
+                (
+                    "Accept",
+                    "text/html,application/xhtml+xml,application/xml;q=0.9,ima"
+                    "ge/webp,image/apng,*/*;q=0.8",
+                ),
+                ("Cache-Control", "max-age=0"),
+                ("Referer", "http://httpbin.org/forms/post"),
+                ("Connection", "keep-alive"),
+            ],
+        }
+        self._test_command(curl_command, expected_result)
+
+    def test_post_data_raw(self):
+        curl_command = (
+            "curl 'https://www.example.org/' --data-raw 'excerptLength=200&ena"
+            "bleDidYouMean=true&sortCriteria=ffirstz32xnamez32x201740686%20asc"
+            "ending&queryFunctions=%5B%5D&rankingFunctions=%5B%5D'"
+        )
+        expected_result = {
+            "method": "POST",
+            "url": "https://www.example.org/",
+            "body": (
+                "excerptLength=200&enableDidYouMean=true&sortCriteria=ffirstz3"
+                "2xnamez32x201740686%20ascending&queryFunctions=%5B%5D&ranking"
+                "Functions=%5B%5D")
+        }
+        self._test_command(curl_command, expected_result)
+
+    def test_explicit_get_with_data(self):
+        curl_command = 'curl httpbin.org/anything -X GET --data asdf'
+        expected_result = {
+            "method": "GET",
+            "url": "http://httpbin.org/anything",
+            "body": "asdf"
+        }
+        self._test_command(curl_command, expected_result)
+
+    def test_patch(self):
+        curl_command = (
+            'curl "https://example.com/api/fake" -u "username:password" -H "Ac'
+            'cept: application/vnd.go.cd.v4+json" -H "Content-Type: applicatio'
+            'n/json" -X PATCH -d \'{"hostname": "agent02.example.com",  "agent'
+            '_config_state": "Enabled", "resources": ["Java","Linux"], "enviro'
+            'nments": ["Dev"]}\''
+        )
+        expected_result = {
+            "method": "PATCH",
+            "url": "https://example.com/api/fake",
+            "headers": [
+                ("Accept", "application/vnd.go.cd.v4+json"),
+                ("Content-Type", "application/json"),
+                ("Authorization", basic_auth_header("username", "password")),
+            ],
+            "body": '{"hostname": "agent02.example.com",  "agent_config_state"'
+                    ': "Enabled", "resources": ["Java","Linux"], "environments'
+                    '": ["Dev"]}',
+        }
+        self._test_command(curl_command, expected_result)
+
+    def test_delete(self):
+        curl_command = 'curl -X "DELETE" https://www.url.com/page'
+        expected_result = {
+            "method": "DELETE", "url": "https://www.url.com/page"
+        }
+        self._test_command(curl_command, expected_result)
+
+    def test_get_silent(self):
+        curl_command = 'curl --silent "www.example.com"'
+        expected_result = {"method": "GET", "url": "http://www.example.com"}
+        self.assertEqual(curl_to_request_kwargs(curl_command), expected_result)
+
+    def test_too_few_arguments_error(self):
+        self.assertRaisesRegex(
+            ValueError,
+            r"too few arguments|the following arguments are required:\s*url",
+            lambda: curl_to_request_kwargs("curl"),
+        )
+
+    def test_ignore_unknown_options(self):
+        # case 1: ignore_unknown_options=True:
+        with warnings.catch_warnings():  # avoid warning when executing tests
+            warnings.simplefilter('ignore')
+            curl_command = 'curl --bar --baz http://www.example.com'
+            expected_result = {"method": "GET", "url": "http://www.example.com"}
+            self.assertEqual(curl_to_request_kwargs(curl_command), expected_result)
+
+        # case 2: ignore_unknown_options=False (raise exception):
+        self.assertRaisesRegex(
+            ValueError,
+            "Unrecognized options:.*--bar.*--baz",
+            lambda: curl_to_request_kwargs(
+                "curl --bar --baz http://www.example.com",
+                ignore_unknown_options=False
+            ),
+        )
+
+    def test_must_start_with_curl_error(self):
+        self.assertRaises(
+            ValueError,
+            lambda: curl_to_request_kwargs("carl -X POST http://example.org")
+        )
diff --git a/tests/test_utils_datatypes.py b/tests/test_utils_datatypes.py
index b31d2179c..aa18ef1f3 100644
--- a/tests/test_utils_datatypes.py
+++ b/tests/test_utils_datatypes.py
@@ -1,23 +1,73 @@
 import copy
 import unittest
+from collections.abc import Mapping, MutableMapping
+
+from scrapy.http import Request
+from scrapy.utils.datatypes import CaselessDict, LocalCache, LocalWeakReferencedCache, SequenceExclude
+from scrapy.utils.python import garbage_collect
 
-from scrapy.utils.datatypes import CaselessDict
 
 __doctests__ = ['scrapy.utils.datatypes']
 
+
 class CaselessDictTest(unittest.TestCase):
 
-    def test_init(self):
+    def test_init_dict(self):
         seq = {'red': 1, 'black': 3}
         d = CaselessDict(seq)
         self.assertEqual(d['red'], 1)
         self.assertEqual(d['black'], 3)
 
+    def test_init_pair_sequence(self):
         seq = (('red', 1), ('black', 3))
         d = CaselessDict(seq)
         self.assertEqual(d['red'], 1)
         self.assertEqual(d['black'], 3)
 
+    def test_init_mapping(self):
+        class MyMapping(Mapping):
+            def __init__(self, **kwargs):
+                self._d = kwargs
+
+            def __getitem__(self, key):
+                return self._d[key]
+
+            def __iter__(self):
+                return iter(self._d)
+
+            def __len__(self):
+                return len(self._d)
+
+        seq = MyMapping(red=1, black=3)
+        d = CaselessDict(seq)
+        self.assertEqual(d['red'], 1)
+        self.assertEqual(d['black'], 3)
+
+    def test_init_mutable_mapping(self):
+        class MyMutableMapping(MutableMapping):
+            def __init__(self, **kwargs):
+                self._d = kwargs
+
+            def __getitem__(self, key):
+                return self._d[key]
+
+            def __setitem__(self, key, value):
+                self._d[key] = value
+
+            def __delitem__(self, key):
+                del self._d[key]
+
+            def __iter__(self):
+                return iter(self._d)
+
+            def __len__(self):
+                return len(self._d)
+
+        seq = MyMutableMapping(red=1, black=3)
+        d = CaselessDict(seq)
+        self.assertEqual(d['red'], 1)
+        self.assertEqual(d['black'], 3)
+
     def test_caseless(self):
         d = CaselessDict()
         d['key_Lower'] = 1
@@ -128,6 +178,147 @@ class CaselessDictTest(unittest.TestCase):
         assert isinstance(h2, CaselessDict)
 
 
+class SequenceExcludeTest(unittest.TestCase):
+
+    def test_list(self):
+        seq = [1, 2, 3]
+        d = SequenceExclude(seq)
+        self.assertIn(0, d)
+        self.assertIn(4, d)
+        self.assertNotIn(2, d)
+
+    def test_range(self):
+        seq = range(10, 20)
+        d = SequenceExclude(seq)
+        self.assertIn(5, d)
+        self.assertIn(20, d)
+        self.assertNotIn(15, d)
+
+    def test_range_step(self):
+        seq = range(10, 20, 3)
+        d = SequenceExclude(seq)
+        are_not_in = [v for v in range(10, 20, 3) if v in d]
+        self.assertEqual([], are_not_in)
+
+        are_not_in = [v for v in range(10, 20) if v in d]
+        self.assertEqual([11, 12, 14, 15, 17, 18], are_not_in)
+
+    def test_string_seq(self):
+        seq = "cde"
+        d = SequenceExclude(seq)
+        chars = "".join(v for v in "abcdefg" if v in d)
+        self.assertEqual("abfg", chars)
+
+    def test_stringset_seq(self):
+        seq = set("cde")
+        d = SequenceExclude(seq)
+        chars = "".join(v for v in "abcdefg" if v in d)
+        self.assertEqual("abfg", chars)
+
+    def test_set(self):
+        """Anything that is not in the supplied sequence will evaluate as 'in' the container."""
+        seq = {-3, "test", 1.1}
+        d = SequenceExclude(seq)
+        self.assertIn(0, d)
+        self.assertIn("foo", d)
+        self.assertIn(3.14, d)
+        self.assertIn(set("bar"), d)
+
+        # supplied sequence is a set, so checking for list (non)inclusion fails
+        self.assertRaises(TypeError, (0, 1, 2) in d)
+        self.assertRaises(TypeError, d.__contains__, ['a', 'b', 'c'])
+
+        for v in [-3, "test", 1.1]:
+            self.assertNotIn(v, d)
+
+
+class LocalCacheTest(unittest.TestCase):
+
+    def test_cache_with_limit(self):
+        cache = LocalCache(limit=2)
+        cache['a'] = 1
+        cache['b'] = 2
+        cache['c'] = 3
+        self.assertEqual(len(cache), 2)
+        self.assertNotIn('a', cache)
+        self.assertIn('b', cache)
+        self.assertIn('c', cache)
+        self.assertEqual(cache['b'], 2)
+        self.assertEqual(cache['c'], 3)
+
+    def test_cache_without_limit(self):
+        maximum = 10**4
+        cache = LocalCache()
+        for x in range(maximum):
+            cache[str(x)] = x
+        self.assertEqual(len(cache), maximum)
+        for x in range(maximum):
+            self.assertIn(str(x), cache)
+            self.assertEqual(cache[str(x)], x)
+
+
+class LocalWeakReferencedCacheTest(unittest.TestCase):
+
+    def test_cache_with_limit(self):
+        cache = LocalWeakReferencedCache(limit=2)
+        r1 = Request('https://example.org')
+        r2 = Request('https://example.com')
+        r3 = Request('https://example.net')
+        cache[r1] = 1
+        cache[r2] = 2
+        cache[r3] = 3
+        self.assertEqual(len(cache), 2)
+        self.assertNotIn(r1, cache)
+        self.assertIn(r2, cache)
+        self.assertIn(r3, cache)
+        self.assertEqual(cache[r1], None)
+        self.assertEqual(cache[r2], 2)
+        self.assertEqual(cache[r3], 3)
+        del r2
+
+        # PyPy takes longer to collect dead references
+        garbage_collect()
+
+        self.assertEqual(len(cache), 1)
+
+    def test_cache_non_weak_referenceable_objects(self):
+        cache = LocalWeakReferencedCache()
+        k1 = None
+        k2 = 1
+        k3 = [1, 2, 3]
+        cache[k1] = 1
+        cache[k2] = 2
+        cache[k3] = 3
+        self.assertNotIn(k1, cache)
+        self.assertNotIn(k2, cache)
+        self.assertNotIn(k3, cache)
+        self.assertEqual(len(cache), 0)
+
+    def test_cache_without_limit(self):
+        max = 10**4
+        cache = LocalWeakReferencedCache()
+        refs = []
+        for x in range(max):
+            refs.append(Request('https://example.org/{}'.format(x)))
+            cache[refs[-1]] = x
+        self.assertEqual(len(cache), max)
+        for i, r in enumerate(refs):
+            self.assertIn(r, cache)
+            self.assertEqual(cache[r], i)
+        del r  # delete reference to the last object in the list
+
+        # delete half of the objects, make sure that is reflected in the cache
+        for _ in range(max // 2):
+            refs.pop()
+
+        # PyPy takes longer to collect dead references
+        garbage_collect()
+
+        self.assertEqual(len(cache), max // 2)
+        for i, r in enumerate(refs):
+            self.assertIn(r, cache)
+            self.assertEqual(cache[r], i)
+
+
 if __name__ == "__main__":
     unittest.main()
-
diff --git a/tests/test_utils_defer.py b/tests/test_utils_defer.py
index b9cb3ab44..8c84331b9 100644
--- a/tests/test_utils_defer.py
+++ b/tests/test_utils_defer.py
@@ -2,24 +2,31 @@ from twisted.trial import unittest
 from twisted.internet import reactor, defer
 from twisted.python.failure import Failure
 
-from scrapy.utils.defer import mustbe_deferred, process_chain, \
-    process_chain_both, process_parallel, iter_errback
+from scrapy.utils.defer import (
+    iter_errback,
+    mustbe_deferred,
+    process_chain,
+    process_chain_both,
+    process_parallel,
+)
 
 
 class MustbeDeferredTest(unittest.TestCase):
     def test_success_function(self):
         steps = []
+
         def _append(v):
             steps.append(v)
             return steps
 
         dfd = mustbe_deferred(_append, 1)
-        dfd.addCallback(self.assertEqual, [1, 2]) # it is [1] with maybeDeferred
-        steps.append(2) # add another value, that should be catched by assertEqual
+        dfd.addCallback(self.assertEqual, [1, 2])  # it is [1] with maybeDeferred
+        steps.append(2)  # add another value, that should be catched by assertEqual
         return dfd
 
     def test_unfired_deferred(self):
         steps = []
+
         def _append(v):
             steps.append(v)
             dfd = defer.Deferred()
@@ -27,18 +34,27 @@ class MustbeDeferredTest(unittest.TestCase):
             return dfd
 
         dfd = mustbe_deferred(_append, 1)
-        dfd.addCallback(self.assertEqual, [1, 2]) # it is [1] with maybeDeferred
-        steps.append(2) # add another value, that should be catched by assertEqual
+        dfd.addCallback(self.assertEqual, [1, 2])  # it is [1] with maybeDeferred
+        steps.append(2)  # add another value, that should be catched by assertEqual
         return dfd
 
+
 def cb1(value, arg1, arg2):
     return "(cb1 %s %s %s)" % (value, arg1, arg2)
+
+
 def cb2(value, arg1, arg2):
     return defer.succeed("(cb2 %s %s %s)" % (value, arg1, arg2))
+
+
 def cb3(value, arg1, arg2):
     return "(cb3 %s %s %s)" % (value, arg1, arg2)
+
+
 def cb_fail(value, arg1, arg2):
     return Failure(TypeError())
+
+
 def eb1(failure, arg1, arg2):
     return "(eb1 %s %s %s)" % (failure.value.__class__.__name__, arg1, arg2)
 
@@ -53,7 +69,7 @@ class DeferUtilsTest(unittest.TestCase):
         gotexc = False
         try:
             yield process_chain([cb1, cb_fail, cb3], 'res', 'v1', 'v2')
-        except TypeError as e:
+        except TypeError:
             gotexc = True
         self.assertTrue(gotexc)
 
@@ -74,7 +90,6 @@ class DeferUtilsTest(unittest.TestCase):
     def test_process_parallel_failure(self):
         d = process_parallel([cb1, cb_fail, cb3], 'res', 'v1', 'v2')
         self.failUnlessFailure(d, TypeError)
-        self.flushLoggedErrors()
         return d
 
 
@@ -82,19 +97,19 @@ class IterErrbackTest(unittest.TestCase):
 
     def test_iter_errback_good(self):
         def itergood():
-            for x in xrange(10):
+            for x in range(10):
                 yield x
 
         errors = []
         out = list(iter_errback(itergood(), errors.append))
-        self.assertEqual(out, range(10))
-        self.failIf(errors)
+        self.assertEqual(out, list(range(10)))
+        self.assertFalse(errors)
 
     def test_iter_errback_bad(self):
         def iterbad():
-            for x in xrange(10):
+            for x in range(10):
                 if x == 5:
-                    a = 1/0
+                    1 / 0
                 yield x
 
         errors = []
diff --git a/tests/test_utils_deprecate.py b/tests/test_utils_deprecate.py
index d31b1d1f3..35d35b45d 100644
--- a/tests/test_utils_deprecate.py
+++ b/tests/test_utils_deprecate.py
@@ -1,17 +1,16 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
 import inspect
 import unittest
+from unittest import mock
 import warnings
-import mock
-from scrapy.utils.deprecate import create_deprecated_class
+from scrapy.exceptions import ScrapyDeprecationWarning
+from scrapy.utils.deprecate import create_deprecated_class, update_classpath
 
 
 class MyWarning(UserWarning):
     pass
 
 
-class SomeBaseClass(object):
+class SomeBaseClass:
     pass
 
 
@@ -26,7 +25,7 @@ class WarnWhenSubclassedTest(unittest.TestCase):
 
     def test_no_warning_on_definition(self):
         with warnings.catch_warnings(record=True) as w:
-            Deprecated = create_deprecated_class('Deprecated', NewName)
+            create_deprecated_class('Deprecated', NewName)
 
         w = self._mywarnings(w)
         self.assertEqual(w, [])
@@ -108,7 +107,9 @@ class WarnWhenSubclassedTest(unittest.TestCase):
                                              warn_category=MyWarning)
 
         # ignore subclassing warnings
-        with warnings.catch_warnings(record=True):
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore', ScrapyDeprecationWarning)
+
             class UserClass(Deprecated):
                 pass
 
@@ -137,7 +138,8 @@ class WarnWhenSubclassedTest(unittest.TestCase):
         self.assertIn("tests.test_utils_deprecate.Deprecated", msg)
 
     def test_issubclass(self):
-        with warnings.catch_warnings(record=True):
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore', ScrapyDeprecationWarning)
             DeprecatedName = create_deprecated_class('DeprecatedName', NewName)
 
             class UpdatedUserClass1(NewName):
@@ -152,7 +154,7 @@ class WarnWhenSubclassedTest(unittest.TestCase):
             class OutdatedUserClass1a(DeprecatedName):
                 pass
 
-            class UnrelatedClass(object):
+            class UnrelatedClass:
                 pass
 
             class OldStyleClass:
@@ -172,7 +174,8 @@ class WarnWhenSubclassedTest(unittest.TestCase):
         self.assertRaises(TypeError, issubclass, object(), DeprecatedName)
 
     def test_isinstance(self):
-        with warnings.catch_warnings(record=True):
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore', ScrapyDeprecationWarning)
             DeprecatedName = create_deprecated_class('DeprecatedName', NewName)
 
             class UpdatedUserClass2(NewName):
@@ -187,7 +190,7 @@ class WarnWhenSubclassedTest(unittest.TestCase):
             class OutdatedUserClass2a(DeprecatedName):
                 pass
 
-            class UnrelatedClass(object):
+            class UnrelatedClass:
                 pass
 
             class OldStyleClass:
@@ -205,7 +208,8 @@ class WarnWhenSubclassedTest(unittest.TestCase):
         assert not isinstance(OldStyleClass(), DeprecatedName)
 
     def test_clsdict(self):
-        with warnings.catch_warnings(record=True):
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore', ScrapyDeprecationWarning)
             Deprecated = create_deprecated_class('Deprecated', NewName, {'foo': 'bar'})
 
         self.assertEqual(Deprecated.foo, 'bar')
@@ -213,7 +217,7 @@ class WarnWhenSubclassedTest(unittest.TestCase):
     def test_deprecate_a_class_with_custom_metaclass(self):
         Meta1 = type('Meta1', (type,), {})
         New = Meta1('New', (), {})
-        Deprecated = create_deprecated_class('Deprecated', New)
+        create_deprecated_class('Deprecated', New)
 
     def test_deprecate_subclass_of_deprecated_class(self):
         with warnings.catch_warnings(record=True) as w:
@@ -229,6 +233,7 @@ class WarnWhenSubclassedTest(unittest.TestCase):
 
         with warnings.catch_warnings(record=True) as w:
             AlsoDeprecated()
+
             class UserClass(AlsoDeprecated):
                 pass
 
@@ -243,7 +248,34 @@ class WarnWhenSubclassedTest(unittest.TestCase):
         with mock.patch('inspect.stack', side_effect=IndexError):
             with warnings.catch_warnings(record=True) as w:
                 DeprecatedName = create_deprecated_class('DeprecatedName', NewName)
+
                 class SubClass(DeprecatedName):
                     pass
 
         self.assertIn("Error detecting parent module", str(w[0].message))
+
+
+@mock.patch('scrapy.utils.deprecate.DEPRECATION_RULES',
+            [('scrapy.contrib.pipeline.', 'scrapy.pipelines.'),
+             ('scrapy.contrib.', 'scrapy.extensions.')])
+class UpdateClassPathTest(unittest.TestCase):
+
+    def test_old_path_gets_fixed(self):
+        with warnings.catch_warnings(record=True) as w:
+            output = update_classpath('scrapy.contrib.debug.Debug')
+        self.assertEqual(output, 'scrapy.extensions.debug.Debug')
+        self.assertEqual(len(w), 1)
+        self.assertIn("scrapy.contrib.debug.Debug", str(w[0].message))
+        self.assertIn("scrapy.extensions.debug.Debug", str(w[0].message))
+
+    def test_sorted_replacement(self):
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore', ScrapyDeprecationWarning)
+            output = update_classpath('scrapy.contrib.pipeline.Pipeline')
+        self.assertEqual(output, 'scrapy.pipelines.Pipeline')
+
+    def test_unmatched_path_stays_the_same(self):
+        with warnings.catch_warnings(record=True) as w:
+            output = update_classpath('scrapy.unmatched.Path')
+        self.assertEqual(output, 'scrapy.unmatched.Path')
+        self.assertEqual(len(w), 0)
diff --git a/tests/test_utils_display.py b/tests/test_utils_display.py
new file mode 100644
index 000000000..9ec8311d9
--- /dev/null
+++ b/tests/test_utils_display.py
@@ -0,0 +1,78 @@
+from io import StringIO
+
+from unittest import mock, TestCase
+
+from scrapy.utils.display import pformat, pprint
+
+
+class TestDisplay(TestCase):
+    object = {'a': 1}
+    colorized_string = (
+        "{\x1b[33m'\x1b[39;49;00m\x1b[33ma\x1b[39;49;00m\x1b[33m'"
+        "\x1b[39;49;00m: \x1b[34m1\x1b[39;49;00m}\n"
+    )
+    plain_string = "{'a': 1}"
+
+    @mock.patch('sys.platform', 'linux')
+    @mock.patch("sys.stdout.isatty")
+    def test_pformat(self, isatty):
+        isatty.return_value = True
+        self.assertEqual(pformat(self.object), self.colorized_string)
+
+    @mock.patch("sys.stdout.isatty")
+    def test_pformat_dont_colorize(self, isatty):
+        isatty.return_value = True
+        self.assertEqual(pformat(self.object, colorize=False), self.plain_string)
+
+    def test_pformat_not_tty(self):
+        self.assertEqual(pformat(self.object), self.plain_string)
+
+    @mock.patch('sys.platform', 'win32')
+    @mock.patch('platform.version')
+    @mock.patch("sys.stdout.isatty")
+    def test_pformat_old_windows(self, isatty, version):
+        isatty.return_value = True
+        version.return_value = '10.0.14392'
+        self.assertEqual(pformat(self.object), self.colorized_string)
+
+    @mock.patch('sys.platform', 'win32')
+    @mock.patch('scrapy.utils.display._enable_windows_terminal_processing')
+    @mock.patch('platform.version')
+    @mock.patch("sys.stdout.isatty")
+    def test_pformat_windows_no_terminal_processing(self, isatty, version, terminal_processing):
+        isatty.return_value = True
+        version.return_value = '10.0.14393'
+        terminal_processing.return_value = False
+        self.assertEqual(pformat(self.object), self.plain_string)
+
+    @mock.patch('sys.platform', 'win32')
+    @mock.patch('scrapy.utils.display._enable_windows_terminal_processing')
+    @mock.patch('platform.version')
+    @mock.patch("sys.stdout.isatty")
+    def test_pformat_windows(self, isatty, version, terminal_processing):
+        isatty.return_value = True
+        version.return_value = '10.0.14393'
+        terminal_processing.return_value = True
+        self.assertEqual(pformat(self.object), self.colorized_string)
+
+    @mock.patch('sys.platform', 'linux')
+    @mock.patch("sys.stdout.isatty")
+    def test_pformat_no_pygments(self, isatty):
+        isatty.return_value = True
+
+        import builtins
+        real_import = builtins.__import__
+
+        def mock_import(name, globals, locals, fromlist, level):
+            if 'pygments' in name:
+                raise ImportError
+            return real_import(name, globals, locals, fromlist, level)
+
+        builtins.__import__ = mock_import
+        self.assertEqual(pformat(self.object), self.plain_string)
+        builtins.__import__ = real_import
+
+    def test_pprint(self):
+        with mock.patch('sys.stdout', new=StringIO()) as mock_out:
+            pprint(self.object)
+            self.assertEqual(mock_out.getvalue(), "{'a': 1}\n")
diff --git a/tests/test_utils_gz.py b/tests/test_utils_gz.py
index 94e7b71be..7148185f4 100644
--- a/tests/test_utils_gz.py
+++ b/tests/test_utils_gz.py
@@ -1,13 +1,16 @@
 import unittest
 from os.path import join
 
-from scrapy.utils.gz import gunzip
+from w3lib.encoding import html_to_unicode
+
+from scrapy.utils.gz import gunzip, is_gzipped
+from scrapy.http import Response, Headers
 from tests import tests_datadir
 
 SAMPLEDIR = join(tests_datadir, 'compressed')
 
 
-class GzTest(unittest.TestCase):
+class GunzipTest(unittest.TestCase):
 
     def test_gunzip_basic(self):
         with open(join(SAMPLEDIR, 'feed-sample1.xml.gz'), 'rb') as f:
@@ -27,3 +30,49 @@ class GzTest(unittest.TestCase):
         with open(join(SAMPLEDIR, 'truncated-crc-error-short.gz'), 'rb') as f:
             text = gunzip(f.read())
             assert text.endswith(b'</html>')
+
+    def test_is_x_gzipped_right(self):
+        hdrs = Headers({"Content-Type": "application/x-gzip"})
+        r1 = Response("http://www.example.com", headers=hdrs)
+        self.assertTrue(is_gzipped(r1))
+
+    def test_is_gzipped_right(self):
+        hdrs = Headers({"Content-Type": "application/gzip"})
+        r1 = Response("http://www.example.com", headers=hdrs)
+        self.assertTrue(is_gzipped(r1))
+
+    def test_is_gzipped_not_quite(self):
+        hdrs = Headers({"Content-Type": "application/gzippppp"})
+        r1 = Response("http://www.example.com", headers=hdrs)
+        self.assertFalse(is_gzipped(r1))
+
+    def test_is_gzipped_case_insensitive(self):
+        hdrs = Headers({"Content-Type": "Application/X-Gzip"})
+        r1 = Response("http://www.example.com", headers=hdrs)
+        self.assertTrue(is_gzipped(r1))
+
+        hdrs = Headers({"Content-Type": "application/X-GZIP ; charset=utf-8"})
+        r1 = Response("http://www.example.com", headers=hdrs)
+        self.assertTrue(is_gzipped(r1))
+
+    def test_is_gzipped_empty(self):
+        r1 = Response("http://www.example.com")
+        self.assertFalse(is_gzipped(r1))
+
+    def test_is_gzipped_wrong(self):
+        hdrs = Headers({"Content-Type": "application/javascript"})
+        r1 = Response("http://www.example.com", headers=hdrs)
+        self.assertFalse(is_gzipped(r1))
+
+    def test_is_gzipped_with_charset(self):
+        hdrs = Headers({"Content-Type": "application/x-gzip;charset=utf-8"})
+        r1 = Response("http://www.example.com", headers=hdrs)
+        self.assertTrue(is_gzipped(r1))
+
+    def test_gunzip_illegal_eof(self):
+        with open(join(SAMPLEDIR, 'unexpected-eof.gz'), 'rb') as f:
+            text = html_to_unicode('charset=cp1252', gunzip(f.read()))[1]
+            with open(join(SAMPLEDIR, 'unexpected-eof-output.txt'), 'rb') as o:
+                expected_text = o.read().decode("utf-8")
+                self.assertEqual(len(text), len(expected_text))
+                self.assertEqual(text, expected_text)
diff --git a/tests/test_utils_http.py b/tests/test_utils_http.py
index 583105673..363b015a8 100644
--- a/tests/test_utils_http.py
+++ b/tests/test_utils_http.py
@@ -2,6 +2,7 @@ import unittest
 
 from scrapy.utils.http import decode_chunked_transfer
 
+
 class ChunkedTest(unittest.TestCase):
 
     def test_decode_chunked_transfer(self):
@@ -12,9 +13,7 @@ class ChunkedTest(unittest.TestCase):
         chunked_body += "8\r\n" + "sequence\r\n"
         chunked_body += "0\r\n\r\n"
         body = decode_chunked_transfer(chunked_body)
-        self.assertEqual(body, \
-            "This is the data in the first chunk\r\n" +
-            "and this is the second one\r\n" +
-            "consequence")
-
-
+        self.assertEqual(
+            body,
+            "This is the data in the first chunk\r\nand this is the second one\r\nconsequence"
+        )
diff --git a/tests/test_utils_httpobj.py b/tests/test_utils_httpobj.py
index 4f9f7a370..cf8ad1f23 100644
--- a/tests/test_utils_httpobj.py
+++ b/tests/test_utils_httpobj.py
@@ -1,9 +1,10 @@
 import unittest
-from six.moves.urllib.parse import urlparse
+from urllib.parse import urlparse
 
 from scrapy.http import Request
 from scrapy.utils.httpobj import urlparse_cached
 
+
 class HttpobjUtilsTest(unittest.TestCase):
 
     def test_urlparse_cached(self):
diff --git a/tests/test_utils_iterators.py b/tests/test_utils_iterators.py
index 8b5941605..bbdc88dd1 100644
--- a/tests/test_utils_iterators.py
+++ b/tests/test_utils_iterators.py
@@ -1,47 +1,120 @@
 import os
+
 from twisted.trial import unittest
 
-from scrapy.utils.iterators import csviter, xmliter, _body_or_str
-from scrapy.contrib_exp.iterators import xmliter_lxml
+from scrapy.utils.iterators import csviter, xmliter, _body_or_str, xmliter_lxml
 from scrapy.http import XmlResponse, TextResponse, Response
 from tests import get_testdata
 
-FOOBAR_NL = u"foo" + os.linesep + u"bar"
-
 
 class XmliterTestCase(unittest.TestCase):
 
     xmliter = staticmethod(xmliter)
 
     def test_xmliter(self):
-        body = """<?xml version="1.0" encoding="UTF-8"?>\
-            <products xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="someschmea.xsd">\
-              <product id="001">\
-                <type>Type 1</type>\
-                <name>Name 1</name>\
-              </product>\
-              <product id="002">\
-                <type>Type 2</type>\
-                <name>Name 2</name>\
-              </product>\
-            </products>"""
+        body = b"""
+            <?xml version="1.0" encoding="UTF-8"?>
+            <products xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+                      xsi:noNamespaceSchemaLocation="someschmea.xsd">
+              <product id="001">
+                <type>Type 1</type>
+                <name>Name 1</name>
+              </product>
+              <product id="002">
+                <type>Type 2</type>
+                <name>Name 2</name>
+              </product>
+            </products>
+        """
 
         response = XmlResponse(url="http://example.com", body=body)
         attrs = []
         for x in self.xmliter(response, 'product'):
-            attrs.append((x.xpath("@id").extract(), x.xpath("name/text()").extract(), x.xpath("./type/text()").extract()))
+            attrs.append((
+                x.attrib['id'],
+                x.xpath("name/text()").getall(),
+                x.xpath("./type/text()").getall()))
 
         self.assertEqual(attrs,
-                         [(['001'], ['Name 1'], ['Type 1']), (['002'], ['Name 2'], ['Type 2'])])
+                         [('001', ['Name 1'], ['Type 1']), ('002', ['Name 2'], ['Type 2'])])
+
+    def test_xmliter_unusual_node(self):
+        body = b"""<?xml version="1.0" encoding="UTF-8"?>
+            <root>
+                <matchme...></matchme...>
+                <matchmenot></matchmenot>
+            </root>
+        """
+        response = XmlResponse(url="http://example.com", body=body)
+        nodenames = [e.xpath('name()').getall() for e in self.xmliter(response, 'matchme...')]
+        self.assertEqual(nodenames, [['matchme...']])
+
+    def test_xmliter_unicode(self):
+        # example taken from https://github.com/scrapy/scrapy/issues/1665
+        body = """<?xml version="1.0" encoding="UTF-8"?>
+            <þingflokkar>
+               <þingflokkur id="26">
+                  <heiti />
+                  <skammstafanir>
+                     <stuttskammstöfun>-</stuttskammstöfun>
+                     <löngskammstöfun />
+                  </skammstafanir>
+                  <tímabil>
+                     <fyrstaþing>80</fyrstaþing>
+                  </tímabil>
+               </þingflokkur>
+               <þingflokkur id="21">
+                  <heiti>Alþýðubandalag</heiti>
+                  <skammstafanir>
+                     <stuttskammstöfun>Ab</stuttskammstöfun>
+                     <löngskammstöfun>Alþb.</löngskammstöfun>
+                  </skammstafanir>
+                  <tímabil>
+                     <fyrstaþing>76</fyrstaþing>
+                     <síðastaþing>123</síðastaþing>
+                  </tímabil>
+               </þingflokkur>
+               <þingflokkur id="27">
+                  <heiti>Alþýðuflokkur</heiti>
+                  <skammstafanir>
+                     <stuttskammstöfun>A</stuttskammstöfun>
+                     <löngskammstöfun>Alþfl.</löngskammstöfun>
+                  </skammstafanir>
+                  <tímabil>
+                     <fyrstaþing>27</fyrstaþing>
+                     <síðastaþing>120</síðastaþing>
+                  </tímabil>
+               </þingflokkur>
+            </þingflokkar>"""
+
+        for r in (
+            # with bytes
+            XmlResponse(url="http://example.com", body=body.encode('utf-8')),
+            # Unicode body needs encoding information
+            XmlResponse(url="http://example.com", body=body, encoding='utf-8'),
+        ):
+            attrs = []
+            for x in self.xmliter(r, 'þingflokkur'):
+                attrs.append((x.attrib['id'],
+                              x.xpath('./skammstafanir/stuttskammstöfun/text()').getall(),
+                              x.xpath('./tímabil/fyrstaþing/text()').getall()))
+
+            self.assertEqual(attrs,
+                             [('26', ['-'], ['80']),
+                              ('21', ['Ab'], ['76']),
+                              ('27', ['A'], ['27'])])
 
     def test_xmliter_text(self):
-        body = u"""<?xml version="1.0" encoding="UTF-8"?><products><product>one</product><product>two</product></products>"""
+        body = (
+            '<?xml version="1.0" encoding="UTF-8"?>'
+            '<products><product>one</product><product>two</product></products>'
+        )
 
-        self.assertEqual([x.xpath("text()").extract() for x in self.xmliter(body, 'product')],
-                         [[u'one'], [u'two']])
+        self.assertEqual([x.xpath("text()").getall() for x in self.xmliter(body, 'product')],
+                         [['one'], ['two']])
 
     def test_xmliter_namespaces(self):
-        body = """\
+        body = b"""
             <?xml version="1.0" encoding="UTF-8"?>
             <rss version="2.0" xmlns:g="http://base.google.com/ns/1.0">
                 <channel>
@@ -63,15 +136,18 @@ class XmliterTestCase(unittest.TestCase):
         my_iter = self.xmliter(response, 'item')
         node = next(my_iter)
         node.register_namespace('g', 'http://base.google.com/ns/1.0')
-        self.assertEqual(node.xpath('title/text()').extract(), ['Item 1'])
-        self.assertEqual(node.xpath('description/text()').extract(), ['This is item 1'])
-        self.assertEqual(node.xpath('link/text()').extract(), ['http://www.mydummycompany.com/items/1'])
-        self.assertEqual(node.xpath('g:image_link/text()').extract(), ['http://www.mydummycompany.com/images/item1.jpg'])
-        self.assertEqual(node.xpath('g:id/text()').extract(), ['ITEM_1'])
-        self.assertEqual(node.xpath('g:price/text()').extract(), ['400'])
-        self.assertEqual(node.xpath('image_link/text()').extract(), [])
-        self.assertEqual(node.xpath('id/text()').extract(), [])
-        self.assertEqual(node.xpath('price/text()').extract(), [])
+        self.assertEqual(node.xpath('title/text()').getall(), ['Item 1'])
+        self.assertEqual(node.xpath('description/text()').getall(), ['This is item 1'])
+        self.assertEqual(node.xpath('link/text()').getall(), ['http://www.mydummycompany.com/items/1'])
+        self.assertEqual(
+            node.xpath('g:image_link/text()').getall(),
+            ['http://www.mydummycompany.com/images/item1.jpg']
+        )
+        self.assertEqual(node.xpath('g:id/text()').getall(), ['ITEM_1'])
+        self.assertEqual(node.xpath('g:price/text()').getall(), ['400'])
+        self.assertEqual(node.xpath('image_link/text()').getall(), [])
+        self.assertEqual(node.xpath('id/text()').getall(), [])
+        self.assertEqual(node.xpath('price/text()').getall(), [])
 
         my_iter = self.xmliter(response, 'g:image_link')
         node = next(my_iter)
@@ -79,7 +155,10 @@ class XmliterTestCase(unittest.TestCase):
         self.assertEqual(node.xpath('text()').extract(), ['http://www.mydummycompany.com/images/item1.jpg'])
 
     def test_xmliter_exception(self):
-        body = u"""<?xml version="1.0" encoding="UTF-8"?><products><product>one</product><product>two</product></products>"""
+        body = (
+            '<?xml version="1.0" encoding="UTF-8"?>'
+            '<products><product>one</product><product>two</product></products>'
+        )
 
         iter = self.xmliter(body, 'product')
         next(iter)
@@ -87,12 +166,21 @@ class XmliterTestCase(unittest.TestCase):
 
         self.assertRaises(StopIteration, next, iter)
 
+    def test_xmliter_objtype_exception(self):
+        i = self.xmliter(42, 'product')
+        self.assertRaises(TypeError, next, i)
+
     def test_xmliter_encoding(self):
-        body = '<?xml version="1.0" encoding="ISO-8859-9"?>\n<xml>\n    <item>Some Turkish Characters \xd6\xc7\xde\xdd\xd0\xdc \xfc\xf0\xfd\xfe\xe7\xf6</item>\n</xml>\n\n'
+        body = (
+            b'<?xml version="1.0" encoding="ISO-8859-9"?>\n'
+            b'<xml>\n'
+            b'    <item>Some Turkish Characters \xd6\xc7\xde\xdd\xd0\xdc \xfc\xf0\xfd\xfe\xe7\xf6</item>\n'
+            b'</xml>\n\n'
+        )
         response = XmlResponse('http://www.example.com', body=body)
         self.assertEqual(
-            self.xmliter(response, 'item').next().extract(),
-            u'<item>Some Turkish Characters \xd6\xc7\u015e\u0130\u011e\xdc \xfc\u011f\u0131\u015f\xe7\xf6</item>'
+            next(self.xmliter(response, 'item')).get(),
+            '<item>Some Turkish Characters \xd6\xc7\u015e\u0130\u011e\xdc \xfc\u011f\u0131\u015f\xe7\xf6</item>'
         )
 
 
@@ -100,7 +188,7 @@ class LxmlXmliterTestCase(XmliterTestCase):
     xmliter = staticmethod(xmliter_lxml)
 
     def test_xmliter_iterate_namespace(self):
-        body = """\
+        body = b"""
             <?xml version="1.0" encoding="UTF-8"?>
             <rss version="2.0" xmlns="http://base.google.com/ns/1.0">
                 <channel>
@@ -124,9 +212,45 @@ class LxmlXmliterTestCase(XmliterTestCase):
 
         namespace_iter = self.xmliter(response, 'image_link', 'http://base.google.com/ns/1.0')
         node = next(namespace_iter)
-        self.assertEqual(node.xpath('text()').extract(), ['http://www.mydummycompany.com/images/item1.jpg'])
+        self.assertEqual(node.xpath('text()').getall(), ['http://www.mydummycompany.com/images/item1.jpg'])
         node = next(namespace_iter)
-        self.assertEqual(node.xpath('text()').extract(), ['http://www.mydummycompany.com/images/item2.jpg'])
+        self.assertEqual(node.xpath('text()').getall(), ['http://www.mydummycompany.com/images/item2.jpg'])
+
+    def test_xmliter_namespaces_prefix(self):
+        body = b"""
+        <?xml version="1.0" encoding="UTF-8"?>
+        <root>
+            <h:table xmlns:h="http://www.w3.org/TR/html4/">
+              <h:tr>
+                <h:td>Apples</h:td>
+                <h:td>Bananas</h:td>
+              </h:tr>
+            </h:table>
+
+            <f:table xmlns:f="http://www.w3schools.com/furniture">
+              <f:name>African Coffee Table</f:name>
+              <f:width>80</f:width>
+              <f:length>120</f:length>
+            </f:table>
+
+        </root>
+        """
+        response = XmlResponse(url='http://mydummycompany.com', body=body)
+        my_iter = self.xmliter(response, 'table', 'http://www.w3.org/TR/html4/', 'h')
+
+        node = next(my_iter)
+        self.assertEqual(len(node.xpath('h:tr/h:td').getall()), 2)
+        self.assertEqual(node.xpath('h:tr/h:td[1]/text()').getall(), ['Apples'])
+        self.assertEqual(node.xpath('h:tr/h:td[2]/text()').getall(), ['Bananas'])
+
+        my_iter = self.xmliter(response, 'table', 'http://www.w3schools.com/furniture', 'f')
+
+        node = next(my_iter)
+        self.assertEqual(node.xpath('f:name/text()').getall(), ['African Coffee Table'])
+
+    def test_xmliter_objtype_exception(self):
+        i = self.xmliter(42, 'product')
+        self.assertRaises(TypeError, next, i)
 
 
 class UtilsCsvTestCase(unittest.TestCase):
@@ -142,63 +266,96 @@ class UtilsCsvTestCase(unittest.TestCase):
 
         result = [row for row in csv]
         self.assertEqual(result,
-                         [{u'id': u'1', u'name': u'alpha',   u'value': u'foobar'},
-                          {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
-                          {u'id': u'3', u'name': u'multi',   u'value': FOOBAR_NL},
-                          {u'id': u'4', u'name': u'empty',   u'value': u''}])
+                         [{'id': '1', 'name': 'alpha', 'value': 'foobar'},
+                          {'id': '2', 'name': 'unicode', 'value': '\xfan\xedc\xf3d\xe9\u203d'},
+                          {'id': '3', 'name': 'multi', 'value': "foo\nbar"},
+                          {'id': '4', 'name': 'empty', 'value': ''}])
 
         # explicit type check cuz' we no like stinkin' autocasting! yarrr
         for result_row in result:
-            self.assert_(all((isinstance(k, unicode) for k in result_row.keys())))
-            self.assert_(all((isinstance(v, unicode) for v in result_row.values())))
+            self.assertTrue(all((isinstance(k, str) for k in result_row.keys())))
+            self.assertTrue(all((isinstance(v, str) for v in result_row.values())))
 
     def test_csviter_delimiter(self):
-        body = get_testdata('feeds', 'feed-sample3.csv').replace(',', '\t')
+        body = get_testdata('feeds', 'feed-sample3.csv').replace(b',', b'\t')
         response = TextResponse(url="http://example.com/", body=body)
         csv = csviter(response, delimiter='\t')
 
         self.assertEqual([row for row in csv],
-                         [{u'id': u'1', u'name': u'alpha',   u'value': u'foobar'},
-                          {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
-                          {u'id': u'3', u'name': u'multi',   u'value': FOOBAR_NL},
-                          {u'id': u'4', u'name': u'empty',   u'value': u''}])
+                         [{'id': '1', 'name': 'alpha', 'value': 'foobar'},
+                          {'id': '2', 'name': 'unicode', 'value': '\xfan\xedc\xf3d\xe9\u203d'},
+                          {'id': '3', 'name': 'multi', 'value': "foo\nbar"},
+                          {'id': '4', 'name': 'empty', 'value': ''}])
+
+    def test_csviter_quotechar(self):
+        body1 = get_testdata('feeds', 'feed-sample6.csv')
+        body2 = get_testdata('feeds', 'feed-sample6.csv').replace(b',', b'|')
+
+        response1 = TextResponse(url="http://example.com/", body=body1)
+        csv1 = csviter(response1, quotechar="'")
+
+        self.assertEqual([row for row in csv1],
+                         [{'id': '1', 'name': 'alpha', 'value': 'foobar'},
+                          {'id': '2', 'name': 'unicode', 'value': '\xfan\xedc\xf3d\xe9\u203d'},
+                          {'id': '3', 'name': 'multi', 'value': "foo\nbar"},
+                          {'id': '4', 'name': 'empty', 'value': ''}])
+
+        response2 = TextResponse(url="http://example.com/", body=body2)
+        csv2 = csviter(response2, delimiter="|", quotechar="'")
+
+        self.assertEqual([row for row in csv2],
+                         [{'id': '1', 'name': 'alpha', 'value': 'foobar'},
+                          {'id': '2', 'name': 'unicode', 'value': '\xfan\xedc\xf3d\xe9\u203d'},
+                          {'id': '3', 'name': 'multi', 'value': "foo\nbar"},
+                          {'id': '4', 'name': 'empty', 'value': ''}])
+
+    def test_csviter_wrong_quotechar(self):
+        body = get_testdata('feeds', 'feed-sample6.csv')
+        response = TextResponse(url="http://example.com/", body=body)
+        csv = csviter(response)
+
+        self.assertEqual([row for row in csv],
+                         [{"'id'": "1", "'name'": "'alpha'", "'value'": "'foobar'"},
+                          {"'id'": "2", "'name'": "'unicode'", "'value'": "'\xfan\xedc\xf3d\xe9\u203d'"},
+                          {"'id'": "'3'", "'name'": "'multi'", "'value'": "'foo"},
+                          {"'id'": "4", "'name'": "'empty'", "'value'": ""}])
 
     def test_csviter_delimiter_binary_response_assume_utf8_encoding(self):
-        body = get_testdata('feeds', 'feed-sample3.csv').replace(',', '\t')
+        body = get_testdata('feeds', 'feed-sample3.csv').replace(b',', b'\t')
         response = Response(url="http://example.com/", body=body)
         csv = csviter(response, delimiter='\t')
 
         self.assertEqual([row for row in csv],
-                         [{u'id': u'1', u'name': u'alpha',   u'value': u'foobar'},
-                          {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
-                          {u'id': u'3', u'name': u'multi',   u'value': FOOBAR_NL},
-                          {u'id': u'4', u'name': u'empty',   u'value': u''}])
+                         [{'id': '1', 'name': 'alpha', 'value': 'foobar'},
+                          {'id': '2', 'name': 'unicode', 'value': '\xfan\xedc\xf3d\xe9\u203d'},
+                          {'id': '3', 'name': 'multi', 'value': "foo\nbar"},
+                          {'id': '4', 'name': 'empty', 'value': ''}])
 
     def test_csviter_headers(self):
         sample = get_testdata('feeds', 'feed-sample3.csv').splitlines()
-        headers, body = sample[0].split(','), '\n'.join(sample[1:])
+        headers, body = sample[0].split(b','), b'\n'.join(sample[1:])
 
         response = TextResponse(url="http://example.com/", body=body)
-        csv = csviter(response, headers=headers)
+        csv = csviter(response, headers=[h.decode('utf-8') for h in headers])
 
         self.assertEqual([row for row in csv],
-                         [{u'id': u'1', u'name': u'alpha',   u'value': u'foobar'},
-                          {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
-                          {u'id': u'3', u'name': u'multi',   u'value': u'foo\nbar'},
-                          {u'id': u'4', u'name': u'empty',   u'value': u''}])
+                         [{'id': '1', 'name': 'alpha', 'value': 'foobar'},
+                          {'id': '2', 'name': 'unicode', 'value': '\xfan\xedc\xf3d\xe9\u203d'},
+                          {'id': '3', 'name': 'multi', 'value': 'foo\nbar'},
+                          {'id': '4', 'name': 'empty', 'value': ''}])
 
     def test_csviter_falserow(self):
         body = get_testdata('feeds', 'feed-sample3.csv')
-        body = '\n'.join((body, 'a,b', 'a,b,c,d'))
+        body = b'\n'.join((body, b'a,b', b'a,b,c,d'))
 
         response = TextResponse(url="http://example.com/", body=body)
         csv = csviter(response)
 
         self.assertEqual([row for row in csv],
-                         [{u'id': u'1', u'name': u'alpha',   u'value': u'foobar'},
-                          {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
-                          {u'id': u'3', u'name': u'multi',   u'value': FOOBAR_NL},
-                          {u'id': u'4', u'name': u'empty',   u'value': u''}])
+                         [{'id': '1', 'name': 'alpha', 'value': 'foobar'},
+                          {'id': '2', 'name': 'unicode', 'value': '\xfan\xedc\xf3d\xe9\u203d'},
+                          {'id': '3', 'name': 'multi', 'value': "foo\nbar"},
+                          {'id': '4', 'name': 'empty', 'value': ''}])
 
     def test_csviter_exception(self):
         body = get_testdata('feeds', 'feed-sample3.csv')
@@ -218,15 +375,23 @@ class UtilsCsvTestCase(unittest.TestCase):
 
         response = TextResponse(url="http://example.com/", body=body1, encoding='latin1')
         csv = csviter(response)
-        self.assertEqual([row for row in csv],
-            [{u'id': u'1', u'name': u'latin1', u'value': u'test'},
-             {u'id': u'2', u'name': u'something', u'value': u'\xf1\xe1\xe9\xf3'}])
+        self.assertEqual(
+            list(csv),
+            [
+                {'id': '1', 'name': 'latin1', 'value': 'test'},
+                {'id': '2', 'name': 'something', 'value': '\xf1\xe1\xe9\xf3'},
+            ]
+        )
 
         response = TextResponse(url="http://example.com/", body=body2, encoding='cp852')
         csv = csviter(response)
-        self.assertEqual([row for row in csv],
-            [{u'id': u'1', u'name': u'cp852', u'value': u'test'},
-             {u'id': u'2', u'name': u'something', u'value': u'\u255a\u2569\u2569\u2569\u2550\u2550\u2557'}])
+        self.assertEqual(
+            list(csv),
+            [
+                {'id': '1', 'name': 'cp852', 'value': 'test'},
+                {'id': '2', 'name': 'something', 'value': '\u255a\u2569\u2569\u2569\u2550\u2550\u2557'},
+            ]
+        )
 
 
 class TestHelper(unittest.TestCase):
@@ -246,7 +411,6 @@ class TestHelper(unittest.TestCase):
             self.assertTrue(type(r1) is type(r2))
             self.assertTrue(type(r1) is not type(r3))
 
-
     def _assert_type_and_value(self, a, b, obj):
         self.assertTrue(type(a) is type(b),
                         'Got {}, expected {} for {!r}'.format(type(a), type(b), obj))
diff --git a/tests/test_utils_log.py b/tests/test_utils_log.py
new file mode 100644
index 000000000..535f56691
--- /dev/null
+++ b/tests/test_utils_log.py
@@ -0,0 +1,108 @@
+import sys
+import logging
+import unittest
+
+from testfixtures import LogCapture
+from twisted.python.failure import Failure
+
+from scrapy.utils.log import (failure_to_exc_info, TopLevelFormatter,
+                              LogCounterHandler, StreamLogger)
+from scrapy.utils.test import get_crawler
+from scrapy.extensions import telnet
+
+
+class FailureToExcInfoTest(unittest.TestCase):
+
+    def test_failure(self):
+        try:
+            0 / 0
+        except ZeroDivisionError:
+            exc_info = sys.exc_info()
+            failure = Failure()
+
+        self.assertTupleEqual(exc_info, failure_to_exc_info(failure))
+
+    def test_non_failure(self):
+        self.assertIsNone(failure_to_exc_info('test'))
+
+
+class TopLevelFormatterTest(unittest.TestCase):
+
+    def setUp(self):
+        self.handler = LogCapture()
+        self.handler.addFilter(TopLevelFormatter(['test']))
+
+    def test_top_level_logger(self):
+        logger = logging.getLogger('test')
+        with self.handler as log:
+            logger.warning('test log msg')
+        log.check(('test', 'WARNING', 'test log msg'))
+
+    def test_children_logger(self):
+        logger = logging.getLogger('test.test1')
+        with self.handler as log:
+            logger.warning('test log msg')
+        log.check(('test', 'WARNING', 'test log msg'))
+
+    def test_overlapping_name_logger(self):
+        logger = logging.getLogger('test2')
+        with self.handler as log:
+            logger.warning('test log msg')
+        log.check(('test2', 'WARNING', 'test log msg'))
+
+    def test_different_name_logger(self):
+        logger = logging.getLogger('different')
+        with self.handler as log:
+            logger.warning('test log msg')
+        log.check(('different', 'WARNING', 'test log msg'))
+
+
+class LogCounterHandlerTest(unittest.TestCase):
+
+    def setUp(self):
+        settings = {'LOG_LEVEL': 'WARNING'}
+        if not telnet.TWISTED_CONCH_AVAILABLE:
+            # disable it to avoid the extra warning
+            settings['TELNETCONSOLE_ENABLED'] = False
+        self.logger = logging.getLogger('test')
+        self.logger.setLevel(logging.NOTSET)
+        self.logger.propagate = False
+        self.crawler = get_crawler(settings_dict=settings)
+        self.handler = LogCounterHandler(self.crawler)
+        self.logger.addHandler(self.handler)
+
+    def tearDown(self):
+        self.logger.propagate = True
+        self.logger.removeHandler(self.handler)
+
+    def test_init(self):
+        self.assertIsNone(self.crawler.stats.get_value('log_count/DEBUG'))
+        self.assertIsNone(self.crawler.stats.get_value('log_count/INFO'))
+        self.assertIsNone(self.crawler.stats.get_value('log_count/WARNING'))
+        self.assertIsNone(self.crawler.stats.get_value('log_count/ERROR'))
+        self.assertIsNone(self.crawler.stats.get_value('log_count/CRITICAL'))
+
+    def test_accepted_level(self):
+        self.logger.error('test log msg')
+        self.assertEqual(self.crawler.stats.get_value('log_count/ERROR'), 1)
+
+    def test_filtered_out_level(self):
+        self.logger.debug('test log msg')
+        self.assertIsNone(self.crawler.stats.get_value('log_count/INFO'))
+
+
+class StreamLoggerTest(unittest.TestCase):
+
+    def setUp(self):
+        self.stdout = sys.stdout
+        logger = logging.getLogger('test')
+        logger.setLevel(logging.WARNING)
+        sys.stdout = StreamLogger(logger, logging.ERROR)
+
+    def tearDown(self):
+        sys.stdout = self.stdout
+
+    def test_redirect(self):
+        with LogCapture() as log:
+            print('test log msg')
+        log.check(('test', 'ERROR', 'test log msg'))
diff --git a/tests/test_utils_misc/__init__.py b/tests/test_utils_misc/__init__.py
index 01460a10b..9bb996d27 100644
--- a/tests/test_utils_misc/__init__.py
+++ b/tests/test_utils_misc/__init__.py
@@ -1,12 +1,15 @@
 import sys
 import os
 import unittest
+from unittest import mock
 
 from scrapy.item import Item, Field
-from scrapy.utils.misc import load_object, arg_to_iter, walk_modules
+from scrapy.utils.misc import arg_to_iter, create_instance, load_object, set_environ, walk_modules
+
 
 __doctests__ = ['scrapy.utils.misc']
 
+
 class UtilsMiscTestCase(unittest.TestCase):
 
     def test_load_object(self):
@@ -23,20 +26,20 @@ class UtilsMiscTestCase(unittest.TestCase):
             'tests.test_utils_misc.test_walk_modules.mod.mod0',
             'tests.test_utils_misc.test_walk_modules.mod1',
         ]
-        self.assertEquals(set([m.__name__ for m in mods]), set(expected))
+        self.assertEqual({m.__name__ for m in mods}, set(expected))
 
         mods = walk_modules('tests.test_utils_misc.test_walk_modules.mod')
         expected = [
             'tests.test_utils_misc.test_walk_modules.mod',
             'tests.test_utils_misc.test_walk_modules.mod.mod0',
         ]
-        self.assertEquals(set([m.__name__ for m in mods]), set(expected))
+        self.assertEqual({m.__name__ for m in mods}, set(expected))
 
         mods = walk_modules('tests.test_utils_misc.test_walk_modules.mod1')
         expected = [
             'tests.test_utils_misc.test_walk_modules.mod1',
         ]
-        self.assertEquals(set([m.__name__ for m in mods]), set(expected))
+        self.assertEqual({m.__name__ for m in mods}, set(expected))
 
         self.assertRaises(ImportError, walk_modules, 'nomodule999')
 
@@ -51,7 +54,7 @@ class UtilsMiscTestCase(unittest.TestCase):
                 'testegg.spiders.b',
                 'testegg'
             ]
-            self.assertEquals(set([m.__name__ for m in mods]), set(expected))
+            self.assertEqual({m.__name__ for m in mods}, set(expected))
         finally:
             sys.path.remove(egg)
 
@@ -64,15 +67,90 @@ class UtilsMiscTestCase(unittest.TestCase):
         assert hasattr(arg_to_iter(100), '__iter__')
         assert hasattr(arg_to_iter('lala'), '__iter__')
         assert hasattr(arg_to_iter([1, 2, 3]), '__iter__')
-        assert hasattr(arg_to_iter(l for l in 'abcd'), '__iter__')
+        assert hasattr(arg_to_iter(c for c in 'abcd'), '__iter__')
 
         self.assertEqual(list(arg_to_iter(None)), [])
         self.assertEqual(list(arg_to_iter('lala')), ['lala'])
         self.assertEqual(list(arg_to_iter(100)), [100])
-        self.assertEqual(list(arg_to_iter(l for l in 'abc')), ['a', 'b', 'c'])
+        self.assertEqual(list(arg_to_iter(c for c in 'abc')), ['a', 'b', 'c'])
         self.assertEqual(list(arg_to_iter([1, 2, 3])), [1, 2, 3])
-        self.assertEqual(list(arg_to_iter({'a':1})), [{'a': 1}])
+        self.assertEqual(list(arg_to_iter({'a': 1})), [{'a': 1}])
         self.assertEqual(list(arg_to_iter(TestItem(name="john"))), [TestItem(name="john")])
 
+    def test_create_instance(self):
+        settings = mock.MagicMock()
+        crawler = mock.MagicMock(spec_set=['settings'])
+        args = (True, 100.)
+        kwargs = {'key': 'val'}
+
+        def _test_with_settings(mock, settings):
+            create_instance(mock, settings, None, *args, **kwargs)
+            if hasattr(mock, 'from_crawler'):
+                self.assertEqual(mock.from_crawler.call_count, 0)
+            if hasattr(mock, 'from_settings'):
+                mock.from_settings.assert_called_once_with(settings, *args,
+                                                           **kwargs)
+                self.assertEqual(mock.call_count, 0)
+            else:
+                mock.assert_called_once_with(*args, **kwargs)
+
+        def _test_with_crawler(mock, settings, crawler):
+            create_instance(mock, settings, crawler, *args, **kwargs)
+            if hasattr(mock, 'from_crawler'):
+                mock.from_crawler.assert_called_once_with(crawler, *args,
+                                                          **kwargs)
+                if hasattr(mock, 'from_settings'):
+                    self.assertEqual(mock.from_settings.call_count, 0)
+                self.assertEqual(mock.call_count, 0)
+            elif hasattr(mock, 'from_settings'):
+                mock.from_settings.assert_called_once_with(settings, *args,
+                                                           **kwargs)
+                self.assertEqual(mock.call_count, 0)
+            else:
+                mock.assert_called_once_with(*args, **kwargs)
+
+        # Check usage of correct constructor using four mocks:
+        #   1. with no alternative constructors
+        #   2. with from_settings() constructor
+        #   3. with from_crawler() constructor
+        #   4. with from_settings() and from_crawler() constructor
+        spec_sets = (
+            ['__qualname__'],
+            ['__qualname__', 'from_settings'],
+            ['__qualname__', 'from_crawler'],
+            ['__qualname__', 'from_settings', 'from_crawler'],
+        )
+        for specs in spec_sets:
+            m = mock.MagicMock(spec_set=specs)
+            _test_with_settings(m, settings)
+            m.reset_mock()
+            _test_with_crawler(m, settings, crawler)
+
+        # Check adoption of crawler settings
+        m = mock.MagicMock(spec_set=['__qualname__', 'from_settings'])
+        create_instance(m, None, crawler, *args, **kwargs)
+        m.from_settings.assert_called_once_with(crawler.settings, *args,
+                                                **kwargs)
+
+        with self.assertRaises(ValueError):
+            create_instance(m, None, None)
+
+        m.from_settings.return_value = None
+        with self.assertRaises(TypeError):
+            create_instance(m, settings, None)
+
+    def test_set_environ(self):
+        assert os.environ.get('some_test_environ') is None
+        with set_environ(some_test_environ='test_value'):
+            assert os.environ.get('some_test_environ') == 'test_value'
+        assert os.environ.get('some_test_environ') is None
+
+        os.environ['some_test_environ'] = 'test'
+        assert os.environ.get('some_test_environ') == 'test'
+        with set_environ(some_test_environ='test_value'):
+            assert os.environ.get('some_test_environ') == 'test_value'
+        assert os.environ.get('some_test_environ') == 'test'
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_utils_misc/test_return_with_argument_inside_generator.py b/tests/test_utils_misc/test_return_with_argument_inside_generator.py
new file mode 100644
index 000000000..2be38620c
--- /dev/null
+++ b/tests/test_utils_misc/test_return_with_argument_inside_generator.py
@@ -0,0 +1,56 @@
+import unittest
+
+from scrapy.utils.misc import is_generator_with_return_value
+
+
+class UtilsMiscPy3TestCase(unittest.TestCase):
+
+    def test_generators_with_return_statements(self):
+        def f():
+            yield 1
+            return 2
+
+        def g():
+            yield 1
+            return 'asdf'
+
+        def h():
+            yield 1
+            return None
+
+        def i():
+            yield 1
+            return
+
+        def j():
+            yield 1
+
+        def k():
+            yield 1
+            yield from g()
+
+        def m():
+            yield 1
+
+            def helper():
+                return 0
+
+            yield helper()
+
+        def n():
+            yield 1
+
+            def helper():
+                return 0
+
+            yield helper()
+            return 2
+
+        assert is_generator_with_return_value(f)
+        assert is_generator_with_return_value(g)
+        assert not is_generator_with_return_value(h)
+        assert not is_generator_with_return_value(i)
+        assert not is_generator_with_return_value(j)
+        assert not is_generator_with_return_value(k)  # not recursive
+        assert not is_generator_with_return_value(m)
+        assert is_generator_with_return_value(n)
diff --git a/tests/test_utils_project.py b/tests/test_utils_project.py
new file mode 100644
index 000000000..1ef4eeb14
--- /dev/null
+++ b/tests/test_utils_project.py
@@ -0,0 +1,97 @@
+import unittest
+import os
+import tempfile
+import shutil
+import contextlib
+
+from pytest import warns
+
+from scrapy.exceptions import ScrapyDeprecationWarning
+from scrapy.utils.project import data_path, get_project_settings
+
+
+@contextlib.contextmanager
+def inside_a_project():
+    prev_dir = os.getcwd()
+    project_dir = tempfile.mkdtemp()
+
+    try:
+        os.chdir(project_dir)
+        with open('scrapy.cfg', 'w') as f:
+            # create an empty scrapy.cfg
+            f.close()
+
+        yield project_dir
+    finally:
+        os.chdir(prev_dir)
+        shutil.rmtree(project_dir)
+
+
+class ProjectUtilsTest(unittest.TestCase):
+    def test_data_path_outside_project(self):
+        self.assertEqual(
+            os.path.join('.scrapy', 'somepath'),
+            data_path('somepath')
+        )
+        abspath = os.path.join(os.path.sep, 'absolute', 'path')
+        self.assertEqual(abspath, data_path(abspath))
+
+    def test_data_path_inside_project(self):
+        with inside_a_project() as proj_path:
+            expected = os.path.join(proj_path, '.scrapy', 'somepath')
+            self.assertEqual(
+                os.path.realpath(expected),
+                os.path.realpath(data_path('somepath'))
+            )
+            abspath = os.path.join(os.path.sep, 'absolute', 'path')
+            self.assertEqual(abspath, data_path(abspath))
+
+
+@contextlib.contextmanager
+def set_env(**update):
+    modified = set(update.keys()) & set(os.environ.keys())
+    update_after = {k: os.environ[k] for k in modified}
+    remove_after = frozenset(k for k in update if k not in os.environ)
+    try:
+        os.environ.update(update)
+        yield
+    finally:
+        os.environ.update(update_after)
+        for k in remove_after:
+            os.environ.pop(k)
+
+
+class GetProjectSettingsTestCase(unittest.TestCase):
+
+    def test_valid_envvar(self):
+        value = 'tests.test_cmdline.settings'
+        envvars = {
+            'SCRAPY_SETTINGS_MODULE': value,
+        }
+        with set_env(**envvars), warns(None) as warnings:
+            settings = get_project_settings()
+        assert not warnings
+        assert settings.get('SETTINGS_MODULE') == value
+
+    def test_invalid_envvar(self):
+        envvars = {
+            'SCRAPY_FOO': 'bar',
+        }
+        with set_env(**envvars), warns(None) as warnings:
+            get_project_settings()
+        assert len(warnings) == 1
+        assert warnings[0].category == ScrapyDeprecationWarning
+        assert str(warnings[0].message).endswith(': FOO')
+
+    def test_valid_and_invalid_envvars(self):
+        value = 'tests.test_cmdline.settings'
+        envvars = {
+            'SCRAPY_FOO': 'bar',
+            'SCRAPY_SETTINGS_MODULE': value,
+        }
+        with set_env(**envvars), warns(None) as warnings:
+            settings = get_project_settings()
+        assert len(warnings) == 1
+        assert warnings[0].category == ScrapyDeprecationWarning
+        assert str(warnings[0].message).endswith(': FOO')
+        assert settings.get('SETTINGS_MODULE') == value
diff --git a/tests/test_utils_python.py b/tests/test_utils_python.py
index badfefe04..c298d0bd2 100644
--- a/tests/test_utils_python.py
+++ b/tests/test_utils_python.py
@@ -1,49 +1,80 @@
 import functools
+import gc
 import operator
+import platform
 import unittest
 from itertools import count
+from sys import version_info
+from warnings import catch_warnings
+
+from scrapy.utils.python import (
+    memoizemethod_noargs, binary_is_text, equal_attributes,
+    WeakKeyCache, get_func_args, to_bytes, to_unicode,
+    without_none_values, MutableChain)
 
-from scrapy.utils.python import str_to_unicode, unicode_to_str, \
-    memoizemethod_noargs, isbinarytext, equal_attributes, \
-    WeakKeyCache, stringify_dict, get_func_args
 
 __doctests__ = ['scrapy.utils.python']
 
-class UtilsPythonTestCase(unittest.TestCase):
-    def test_str_to_unicode(self):
-        # converting an utf-8 encoded string to unicode
-        self.assertEqual(str_to_unicode('lel\xc3\xb1e'), u'lel\xf1e')
 
-        # converting a latin-1 encoded string to unicode
-        self.assertEqual(str_to_unicode('lel\xf1e', 'latin-1'), u'lel\xf1e')
+class MutableChainTest(unittest.TestCase):
+    def test_mutablechain(self):
+        m = MutableChain(range(2), [2, 3], (4, 5))
+        m.extend(range(6, 7))
+        m.extend([7, 8])
+        m.extend([9, 10], (11, 12))
+        self.assertEqual(next(m), 0)
+        self.assertEqual(m.__next__(), 1)
+        with catch_warnings(record=True) as warnings:
+            self.assertEqual(m.next(), 2)
+            self.assertEqual(len(warnings), 1)
+            self.assertIn('scrapy.utils.python.MutableChain.__next__',
+                          str(warnings[0].message))
+        self.assertEqual(list(m), list(range(3, 13)))
 
-        # converting a unicode to unicode should return the same object
-        self.assertEqual(str_to_unicode(u'\xf1e\xf1e\xf1e'), u'\xf1e\xf1e\xf1e')
 
-        # converting a strange object should raise TypeError
-        self.assertRaises(TypeError, str_to_unicode, 423)
+class ToUnicodeTest(unittest.TestCase):
+    def test_converting_an_utf8_encoded_string_to_unicode(self):
+        self.assertEqual(to_unicode(b'lel\xc3\xb1e'), 'lel\xf1e')
 
-        # check errors argument works
-        assert u'\ufffd' in str_to_unicode('a\xedb', 'utf-8', errors='replace')
+    def test_converting_a_latin_1_encoded_string_to_unicode(self):
+        self.assertEqual(to_unicode(b'lel\xf1e', 'latin-1'), 'lel\xf1e')
 
-    def test_unicode_to_str(self):
-        # converting a unicode object to an utf-8 encoded string
-        self.assertEqual(unicode_to_str(u'\xa3 49'), '\xc2\xa3 49')
+    def test_converting_a_unicode_to_unicode_should_return_the_same_object(self):
+        self.assertEqual(to_unicode('\xf1e\xf1e\xf1e'), '\xf1e\xf1e\xf1e')
 
-        # converting a unicode object to a latin-1 encoded string
-        self.assertEqual(unicode_to_str(u'\xa3 49', 'latin-1'), '\xa3 49')
+    def test_converting_a_strange_object_should_raise_TypeError(self):
+        self.assertRaises(TypeError, to_unicode, 423)
 
-        # converting a regular string to string should return the same object
-        self.assertEqual(unicode_to_str('lel\xf1e'), 'lel\xf1e')
+    def test_errors_argument(self):
+        self.assertEqual(
+            to_unicode(b'a\xedb', 'utf-8', errors='replace'),
+            'a\ufffdb'
+        )
 
-        # converting a strange object should raise TypeError
-        self.assertRaises(TypeError, unicode_to_str, unittest)
 
-        # check errors argument works
-        assert '?' in unicode_to_str(u'a\ufffdb', 'latin-1', errors='replace')
+class ToBytesTest(unittest.TestCase):
+    def test_converting_a_unicode_object_to_an_utf_8_encoded_string(self):
+        self.assertEqual(to_bytes('\xa3 49'), b'\xc2\xa3 49')
 
+    def test_converting_a_unicode_object_to_a_latin_1_encoded_string(self):
+        self.assertEqual(to_bytes('\xa3 49', 'latin-1'), b'\xa3 49')
+
+    def test_converting_a_regular_bytes_to_bytes_should_return_the_same_object(self):
+        self.assertEqual(to_bytes(b'lel\xf1e'), b'lel\xf1e')
+
+    def test_converting_a_strange_object_should_raise_TypeError(self):
+        self.assertRaises(TypeError, to_bytes, unittest)
+
+    def test_errors_argument(self):
+        self.assertEqual(
+            to_bytes('a\ufffdb', 'latin-1', errors='replace'),
+            b'a?b'
+        )
+
+
+class MemoizedMethodTest(unittest.TestCase):
     def test_memoizemethod_noargs(self):
-        class A(object):
+        class A:
 
             @memoizemethod_noargs
             def cached(self):
@@ -59,19 +90,22 @@ class UtilsPythonTestCase(unittest.TestCase):
         assert one is two
         assert one is not three
 
-    def test_isbinarytext(self):
 
-        # basic tests
-        assert not isbinarytext("hello")
+class BinaryIsTextTest(unittest.TestCase):
+    def test_binaryistext(self):
+        assert binary_is_text(b"hello")
 
-        # utf-16 strings contain null bytes
-        assert not isbinarytext(u"hello".encode('utf-16'))
+    def test_utf_16_strings_contain_null_bytes(self):
+        assert binary_is_text("hello".encode('utf-16'))
 
-        # one with encoding
-        assert not isbinarytext("<div>Price \xa3</div>")
+    def test_one_with_encoding(self):
+        assert binary_is_text(b"<div>Price \xa3</div>")
 
-        # finally some real binary bytes
-        assert isbinarytext("\x02\xa3")
+    def test_real_binary_bytes(self):
+        assert not binary_is_text(b"\x02\xa3")
+
+
+class UtilsPythonTestCase(unittest.TestCase):
 
     def test_equal_attributes(self):
         class Obj:
@@ -80,9 +114,9 @@ class UtilsPythonTestCase(unittest.TestCase):
         a = Obj()
         b = Obj()
         # no attributes given return False
-        self.failIf(equal_attributes(a, b, []))
+        self.assertFalse(equal_attributes(a, b, []))
         # not existent attributes
-        self.failIf(equal_attributes(a, b, ['x', 'y']))
+        self.assertFalse(equal_attributes(a, b, ['x', 'y']))
 
         a.x = 1
         b.x = 1
@@ -91,7 +125,7 @@ class UtilsPythonTestCase(unittest.TestCase):
 
         b.y = 2
         # obj1 has no attribute y
-        self.failIf(equal_attributes(a, b, ['x', 'y']))
+        self.assertFalse(equal_attributes(a, b, ['x', 'y']))
 
         a.y = 2
         # equal attributes
@@ -99,7 +133,7 @@ class UtilsPythonTestCase(unittest.TestCase):
 
         a.y = 1
         # differente attributes
-        self.failIf(equal_attributes(a, b, ['x', 'y']))
+        self.assertFalse(equal_attributes(a, b, ['x', 'y']))
 
         # test callable
         a.meta = {}
@@ -112,15 +146,19 @@ class UtilsPythonTestCase(unittest.TestCase):
 
         get_z = operator.itemgetter('z')
         get_meta = operator.attrgetter('meta')
-        compare_z = lambda obj: get_z(get_meta(obj))
+
+        def compare_z(obj):
+            return get_z(get_meta(obj))
 
         self.assertTrue(equal_attributes(a, b, [compare_z, 'x']))
         # fail z equality
         a.meta['z'] = 2
-        self.failIf(equal_attributes(a, b, [compare_z, 'x']))
+        self.assertFalse(equal_attributes(a, b, [compare_z, 'x']))
 
     def test_weakkeycache(self):
-        class _Weakme(object): pass
+        class _Weakme:
+            pass
+
         _values = count()
         wk = WeakKeyCache(lambda k: next(_values))
         k = _Weakme()
@@ -129,32 +167,11 @@ class UtilsPythonTestCase(unittest.TestCase):
         self.assertNotEqual(v, wk[_Weakme()])
         self.assertEqual(v, wk[k])
         del k
+        for _ in range(100):
+            if wk._weakdict:
+                gc.collect()
         self.assertFalse(len(wk._weakdict))
 
-    def test_stringify_dict(self):
-        d = {'a': 123, u'b': 'c', u'd': u'e', object(): u'e'}
-        d2 = stringify_dict(d, keys_only=False)
-        self.assertEqual(d, d2)
-        self.failIf(d is d2) # shouldn't modify in place
-        self.failIf(any(isinstance(x, unicode) for x in d2.keys()))
-        self.failIf(any(isinstance(x, unicode) for x in d2.values()))
-
-    def test_stringify_dict_tuples(self):
-        tuples = [('a', 123), (u'b', 'c'), (u'd', u'e'), (object(), u'e')]
-        d = dict(tuples)
-        d2 = stringify_dict(tuples, keys_only=False)
-        self.assertEqual(d, d2)
-        self.failIf(d is d2) # shouldn't modify in place
-        self.failIf(any(isinstance(x, unicode) for x in d2.keys()), d2.keys())
-        self.failIf(any(isinstance(x, unicode) for x in d2.values()))
-
-    def test_stringify_dict_keys_only(self):
-        d = {'a': 123, u'b': 'c', u'd': u'e', object(): u'e'}
-        d2 = stringify_dict(d)
-        self.assertEqual(d, d2)
-        self.failIf(d is d2) # shouldn't modify in place
-        self.failIf(any(isinstance(x, unicode) for x in d2.keys()))
-
     def test_get_func_args(self):
         def f1(a, b, c):
             pass
@@ -162,14 +179,17 @@ class UtilsPythonTestCase(unittest.TestCase):
         def f2(a, b=None, c=None):
             pass
 
-        class A(object):
+        def f3(a, b=None, *, c=None):
+            pass
+
+        class A:
             def __init__(self, a, b, c):
                 pass
 
             def method(self, a, b, c):
                 pass
 
-        class Callable(object):
+        class Callable:
 
             def __call__(self, a, b, c):
                 pass
@@ -182,6 +202,7 @@ class UtilsPythonTestCase(unittest.TestCase):
 
         self.assertEqual(get_func_args(f1), ['a', 'b', 'c'])
         self.assertEqual(get_func_args(f2), ['a', 'b', 'c'])
+        self.assertEqual(get_func_args(f3), ['a', 'b', 'c'])
         self.assertEqual(get_func_args(A), ['a', 'b', 'c'])
         self.assertEqual(get_func_args(a.method), ['a', 'b', 'c'])
         self.assertEqual(get_func_args(partial_f1), ['b', 'c'])
@@ -190,10 +211,28 @@ class UtilsPythonTestCase(unittest.TestCase):
         self.assertEqual(get_func_args(cal), ['a', 'b', 'c'])
         self.assertEqual(get_func_args(object), [])
 
-        # TODO: how do we fix this to return the actual argument names?
-        self.assertEqual(get_func_args(unicode.split), [])
-        self.assertEqual(get_func_args(" ".join), [])
-        self.assertEqual(get_func_args(operator.itemgetter(2)), [])
+        if platform.python_implementation() == 'CPython':
+            # TODO: how do we fix this to return the actual argument names?
+            self.assertEqual(get_func_args(str.split), [])
+            self.assertEqual(get_func_args(" ".join), [])
+            self.assertEqual(get_func_args(operator.itemgetter(2)), [])
+        else:
+            self.assertEqual(
+                get_func_args(str.split, stripself=True), ['sep', 'maxsplit'])
+            self.assertEqual(
+                get_func_args(operator.itemgetter(2), stripself=True), ['obj'])
+            if version_info < (3, 6):
+                self.assertEqual(get_func_args(" ".join, stripself=True), ['list'])
+            else:
+                self.assertEqual(get_func_args(" ".join, stripself=True), ['iterable'])
+
+    def test_without_none_values(self):
+        self.assertEqual(without_none_values([1, None, 3, 4]), [1, 3, 4])
+        self.assertEqual(without_none_values((1, None, 3, 4)), (1, 3, 4))
+        self.assertEqual(
+            without_none_values({'one': 1, 'none': None, 'three': 3, 'four': 4}),
+            {'one': 1, 'three': 3, 'four': 4})
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_utils_reqser.py b/tests/test_utils_reqser.py
index 4ddc2f472..de94ec960 100644
--- a/tests/test_utils_reqser.py
+++ b/tests/test_utils_reqser.py
@@ -1,9 +1,10 @@
 import unittest
 
-from scrapy.http import Request
-from scrapy.spider import Spider
+from scrapy.http import Request, FormRequest
+from scrapy.spiders import Spider
 from scrapy.utils.reqser import request_to_dict, request_from_dict
 
+
 class RequestSerializationTest(unittest.TestCase):
 
     def setUp(self):
@@ -14,24 +15,27 @@ class RequestSerializationTest(unittest.TestCase):
         self._assert_serializes_ok(r)
 
     def test_all_attributes(self):
-        r = Request("http://www.example.com",
-            callback='parse_item',
-            errback='handle_error',
+        r = Request(
+            url="http://www.example.com",
+            callback=self.spider.parse_item,
+            errback=self.spider.handle_error,
             method="POST",
-            body="some body",
+            body=b"some body",
             headers={'content-encoding': 'text/html; charset=latin-1'},
-            cookies={'currency': 'usd'},
+            cookies={'currency': 'руб'},
             encoding='latin-1',
             priority=20,
-            meta={'a': 'b'})
-        self._assert_serializes_ok(r)
+            meta={'a': 'b'},
+            cb_kwargs={'k': 'v'},
+            flags=['testFlag'])
+        self._assert_serializes_ok(r, spider=self.spider)
 
     def test_latin1_body(self):
-        r = Request("http://www.example.com", body="\xa3")
+        r = Request("http://www.example.com", body=b"\xa3")
         self._assert_serializes_ok(r)
 
     def test_utf8_body(self):
-        r = Request("http://www.example.com", body="\xc2\xa3")
+        r = Request("http://www.example.com", body=b"\xc2\xa3")
         self._assert_serializes_ok(r)
 
     def _assert_serializes_ok(self, request, spider=None):
@@ -40,6 +44,7 @@ class RequestSerializationTest(unittest.TestCase):
         self._assert_same_request(request, request2)
 
     def _assert_same_request(self, r1, r2):
+        self.assertEqual(r1.__class__, r2.__class__)
         self.assertEqual(r1.url, r2.url)
         self.assertEqual(r1.callback, r2.callback)
         self.assertEqual(r1.errback, r2.errback)
@@ -48,13 +53,53 @@ class RequestSerializationTest(unittest.TestCase):
         self.assertEqual(r1.headers, r2.headers)
         self.assertEqual(r1.cookies, r2.cookies)
         self.assertEqual(r1.meta, r2.meta)
+        self.assertEqual(r1.cb_kwargs, r2.cb_kwargs)
         self.assertEqual(r1._encoding, r2._encoding)
         self.assertEqual(r1.priority, r2.priority)
         self.assertEqual(r1.dont_filter, r2.dont_filter)
+        self.assertEqual(r1.flags, r2.flags)
+
+    def test_request_class(self):
+        r = FormRequest("http://www.example.com")
+        self._assert_serializes_ok(r, spider=self.spider)
+        r = CustomRequest("http://www.example.com")
+        self._assert_serializes_ok(r, spider=self.spider)
 
     def test_callback_serialization(self):
-        r = Request("http://www.example.com", callback=self.spider.parse_item, \
-            errback=self.spider.handle_error)
+        r = Request("http://www.example.com", callback=self.spider.parse_item,
+                    errback=self.spider.handle_error)
+        self._assert_serializes_ok(r, spider=self.spider)
+
+    def test_reference_callback_serialization(self):
+        r = Request("http://www.example.com",
+                    callback=self.spider.parse_item_reference,
+                    errback=self.spider.handle_error_reference)
+        self._assert_serializes_ok(r, spider=self.spider)
+        request_dict = request_to_dict(r, self.spider)
+        self.assertEqual(request_dict['callback'], 'parse_item_reference')
+        self.assertEqual(request_dict['errback'], 'handle_error_reference')
+
+    def test_private_reference_callback_serialization(self):
+        r = Request("http://www.example.com",
+                    callback=self.spider._TestSpider__parse_item_reference,
+                    errback=self.spider._TestSpider__handle_error_reference)
+        self._assert_serializes_ok(r, spider=self.spider)
+        request_dict = request_to_dict(r, self.spider)
+        self.assertEqual(request_dict['callback'],
+                         '_TestSpider__parse_item_reference')
+        self.assertEqual(request_dict['errback'],
+                         '_TestSpider__handle_error_reference')
+
+    def test_private_callback_serialization(self):
+        r = Request("http://www.example.com",
+                    callback=self.spider._TestSpider__parse_item_private,
+                    errback=self.spider.handle_error)
+        self._assert_serializes_ok(r, spider=self.spider)
+
+    def test_mixin_private_callback_serialization(self):
+        r = Request("http://www.example.com",
+                    callback=self.spider._TestSpiderMixin__mixin_callback,
+                    errback=self.spider.handle_error)
         self._assert_serializes_ok(r, spider=self.spider)
 
     def test_unserializable_callback1(self):
@@ -66,10 +111,59 @@ class RequestSerializationTest(unittest.TestCase):
         r = Request("http://www.example.com", callback=self.spider.parse_item)
         self.assertRaises(ValueError, request_to_dict, r)
 
+    def test_unserializable_callback3(self):
+        """Parser method is removed or replaced dynamically."""
 
-class TestSpider(Spider):
+        class MySpider(Spider):
+
+            name = 'my_spider'
+
+            def parse(self, response):
+                pass
+
+        spider = MySpider()
+        r = Request("http://www.example.com", callback=spider.parse)
+        setattr(spider, 'parse', None)
+        self.assertRaises(ValueError, request_to_dict, r, spider=spider)
+
+
+class TestSpiderMixin:
+    def __mixin_callback(self, response):
+        pass
+
+
+def parse_item(response):
+    pass
+
+
+def handle_error(failure):
+    pass
+
+
+def private_parse_item(response):
+    pass
+
+
+def private_handle_error(failure):
+    pass
+
+
+class TestSpider(Spider, TestSpiderMixin):
     name = 'test'
+    parse_item_reference = parse_item
+    handle_error_reference = handle_error
+    __parse_item_reference = private_parse_item
+    __handle_error_reference = private_handle_error
+
     def parse_item(self, response):
         pass
+
     def handle_error(self, failure):
         pass
+
+    def __parse_item_private(self, response):
+        pass
+
+
+class CustomRequest(Request):
+    pass
diff --git a/tests/test_utils_request.py b/tests/test_utils_request.py
index c4a54c0ff..7e0049b1d 100644
--- a/tests/test_utils_request.py
+++ b/tests/test_utils_request.py
@@ -1,8 +1,12 @@
-from __future__ import print_function
 import unittest
 from scrapy.http import Request
-from scrapy.utils.request import request_fingerprint, _fingerprint_cache, \
-    request_authenticate, request_httprepr
+from scrapy.utils.request import (
+    _fingerprint_cache,
+    request_authenticate,
+    request_fingerprint,
+    request_httprepr,
+)
+
 
 class UtilsRequestTest(unittest.TestCase):
 
@@ -17,34 +21,42 @@ class UtilsRequestTest(unittest.TestCase):
         self.assertNotEqual(request_fingerprint(r1), request_fingerprint(r2))
 
         # make sure caching is working
-        self.assertEqual(request_fingerprint(r1), _fingerprint_cache[r1][None])
+        self.assertEqual(request_fingerprint(r1), _fingerprint_cache[r1][(None, False)])
 
         r1 = Request("http://www.example.com/members/offers.html")
         r2 = Request("http://www.example.com/members/offers.html")
-        r2.headers['SESSIONID'] = "somehash"
+        r2.headers['SESSIONID'] = b"somehash"
         self.assertEqual(request_fingerprint(r1), request_fingerprint(r2))
 
         r1 = Request("http://www.example.com/")
         r2 = Request("http://www.example.com/")
-        r2.headers['Accept-Language'] = 'en'
+        r2.headers['Accept-Language'] = b'en'
         r3 = Request("http://www.example.com/")
-        r3.headers['Accept-Language'] = 'en'
-        r3.headers['SESSIONID'] = "somehash"
+        r3.headers['Accept-Language'] = b'en'
+        r3.headers['SESSIONID'] = b"somehash"
 
         self.assertEqual(request_fingerprint(r1), request_fingerprint(r2), request_fingerprint(r3))
 
         self.assertEqual(request_fingerprint(r1),
                          request_fingerprint(r1, include_headers=['Accept-Language']))
 
-        self.assertNotEqual(request_fingerprint(r1),
-                         request_fingerprint(r2, include_headers=['Accept-Language']))
+        self.assertNotEqual(
+            request_fingerprint(r1),
+            request_fingerprint(r2, include_headers=['Accept-Language']))
 
         self.assertEqual(request_fingerprint(r3, include_headers=['accept-language', 'sessionid']),
                          request_fingerprint(r3, include_headers=['SESSIONID', 'Accept-Language']))
 
+        r1 = Request("http://www.example.com/test.html")
+        r2 = Request("http://www.example.com/test.html#fragment")
+        self.assertEqual(request_fingerprint(r1), request_fingerprint(r2))
+        self.assertEqual(request_fingerprint(r1), request_fingerprint(r1, keep_fragments=True))
+        self.assertNotEqual(request_fingerprint(r2), request_fingerprint(r2, keep_fragments=True))
+        self.assertNotEqual(request_fingerprint(r1), request_fingerprint(r2, keep_fragments=True))
+
         r1 = Request("http://www.example.com")
         r2 = Request("http://www.example.com", method='POST')
-        r3 = Request("http://www.example.com", method='POST', body='request body')
+        r3 = Request("http://www.example.com", method='POST', body=b'request body')
 
         self.assertNotEqual(request_fingerprint(r1), request_fingerprint(r2))
         self.assertNotEqual(request_fingerprint(r2), request_fingerprint(r3))
@@ -52,24 +64,34 @@ class UtilsRequestTest(unittest.TestCase):
         # cached fingerprint must be cleared on request copy
         r1 = Request("http://www.example.com")
         fp1 = request_fingerprint(r1)
-        r2 = r1.replace(url = "http://www.example.com/other")
+        r2 = r1.replace(url="http://www.example.com/other")
         fp2 = request_fingerprint(r2)
         self.assertNotEqual(fp1, fp2)
 
     def test_request_authenticate(self):
         r = Request("http://www.example.com")
         request_authenticate(r, 'someuser', 'somepass')
-        self.assertEqual(r.headers['Authorization'], 'Basic c29tZXVzZXI6c29tZXBhc3M=')
+        self.assertEqual(r.headers['Authorization'], b'Basic c29tZXVzZXI6c29tZXBhc3M=')
 
     def test_request_httprepr(self):
         r1 = Request("http://www.example.com")
-        self.assertEqual(request_httprepr(r1), 'GET / HTTP/1.1\r\nHost: www.example.com\r\n\r\n')
+        self.assertEqual(request_httprepr(r1), b'GET / HTTP/1.1\r\nHost: www.example.com\r\n\r\n')
 
         r1 = Request("http://www.example.com/some/page.html?arg=1")
-        self.assertEqual(request_httprepr(r1), 'GET /some/page.html?arg=1 HTTP/1.1\r\nHost: www.example.com\r\n\r\n')
+        self.assertEqual(request_httprepr(r1), b'GET /some/page.html?arg=1 HTTP/1.1\r\nHost: www.example.com\r\n\r\n')
+
+        r1 = Request("http://www.example.com", method='POST',
+                     headers={"Content-type": b"text/html"}, body=b"Some body")
+        self.assertEqual(
+            request_httprepr(r1),
+            b'POST / HTTP/1.1\r\nHost: www.example.com\r\nContent-Type: text/html\r\n\r\nSome body'
+        )
+
+    def test_request_httprepr_for_non_http_request(self):
+        # the representation is not important but it must not fail.
+        request_httprepr(Request("file:///tmp/foo.txt"))
+        request_httprepr(Request("ftp://localhost/tmp/foo.txt"))
 
-        r1 = Request("http://www.example.com", method='POST', headers={"Content-type": "text/html"}, body="Some body")
-        self.assertEqual(request_httprepr(r1), 'POST / HTTP/1.1\r\nHost: www.example.com\r\nContent-Type: text/html\r\n\r\nSome body')
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_utils_response.py b/tests/test_utils_response.py
index 92c92c057..d6f4c0bb5 100644
--- a/tests/test_utils_response.py
+++ b/tests/test_utils_response.py
@@ -1,55 +1,61 @@
 import os
 import unittest
-from six.moves.urllib.parse import urlparse
+from urllib.parse import urlparse
 
 from scrapy.http import Response, TextResponse, HtmlResponse
-from scrapy.utils.response import response_httprepr, open_in_browser, get_meta_refresh
+from scrapy.utils.python import to_bytes
+from scrapy.utils.response import (response_httprepr, open_in_browser,
+                                   get_meta_refresh, get_base_url, response_status_message)
+
 
 __doctests__ = ['scrapy.utils.response']
 
+
 class ResponseUtilsTest(unittest.TestCase):
-    dummy_response = TextResponse(url='http://example.org/', body='dummy_response')
+    dummy_response = TextResponse(url='http://example.org/', body=b'dummy_response')
 
     def test_response_httprepr(self):
         r1 = Response("http://www.example.com")
-        self.assertEqual(response_httprepr(r1), 'HTTP/1.1 200 OK\r\n\r\n')
+        self.assertEqual(response_httprepr(r1), b'HTTP/1.1 200 OK\r\n\r\n')
 
-        r1 = Response("http://www.example.com", status=404, headers={"Content-type": "text/html"}, body="Some body")
-        self.assertEqual(response_httprepr(r1), 'HTTP/1.1 404 Not Found\r\nContent-Type: text/html\r\n\r\nSome body')
+        r1 = Response("http://www.example.com", status=404, headers={"Content-type": "text/html"}, body=b"Some body")
+        self.assertEqual(response_httprepr(r1), b'HTTP/1.1 404 Not Found\r\nContent-Type: text/html\r\n\r\nSome body')
 
-        r1 = Response("http://www.example.com", status=6666, headers={"Content-type": "text/html"}, body="Some body")
-        self.assertEqual(response_httprepr(r1), 'HTTP/1.1 6666 \r\nContent-Type: text/html\r\n\r\nSome body')
+        r1 = Response("http://www.example.com", status=6666, headers={"Content-type": "text/html"}, body=b"Some body")
+        self.assertEqual(response_httprepr(r1), b'HTTP/1.1 6666 \r\nContent-Type: text/html\r\n\r\nSome body')
 
     def test_open_in_browser(self):
         url = "http:///www.example.com/some/page.html"
-        body = "<html> <head> <title>test page</title> </head> <body>test body</body> </html>"
+        body = b"<html> <head> <title>test page</title> </head> <body>test body</body> </html>"
+
         def browser_open(burl):
             path = urlparse(burl).path
             if not os.path.exists(path):
                 path = burl.replace('file://', '')
-            bbody = open(path).read()
-            assert '<base href="%s">' % url in bbody, "<base> tag not added"
+            with open(path, "rb") as f:
+                bbody = f.read()
+            self.assertIn(b'<base href="' + to_bytes(url) + b'">', bbody)
             return True
         response = HtmlResponse(url, body=body)
-        assert open_in_browser(response, _openfunc=browser_open), \
-            "Browser not called"
-        self.assertRaises(TypeError, open_in_browser, Response(url, body=body), \
-            debug=True)
+        assert open_in_browser(response, _openfunc=browser_open), "Browser not called"
+
+        resp = Response(url, body=body)
+        self.assertRaises(TypeError, open_in_browser, resp, debug=True)
 
     def test_get_meta_refresh(self):
-        r1 = HtmlResponse("http://www.example.com", body="""
+        r1 = HtmlResponse("http://www.example.com", body=b"""
         <html>
         <head><title>Dummy</title><meta http-equiv="refresh" content="5;url=http://example.org/newpage" /></head>
         <body>blahablsdfsal&amp;</body>
         </html>""")
-        r2 = HtmlResponse("http://www.example.com", body="""
+        r2 = HtmlResponse("http://www.example.com", body=b"""
         <html>
         <head><title>Dummy</title><noScript>
         <meta http-equiv="refresh" content="5;url=http://example.org/newpage" /></head>
         </noSCRIPT>
         <body>blahablsdfsal&amp;</body>
         </html>""")
-        r3 = HtmlResponse("http://www.example.com", body="""
+        r3 = HtmlResponse("http://www.example.com", body=b"""
     <noscript><meta http-equiv="REFRESH" content="0;url=http://www.example.com/newpage</noscript>
     <script type="text/javascript">
     if(!checkCookies()){
@@ -61,5 +67,19 @@ class ResponseUtilsTest(unittest.TestCase):
         self.assertEqual(get_meta_refresh(r2), (None, None))
         self.assertEqual(get_meta_refresh(r3), (None, None))
 
-if __name__ == "__main__":
-    unittest.main()
+    def test_get_base_url(self):
+        resp = HtmlResponse("http://www.example.com", body=b"""
+        <html>
+        <head><base href="http://www.example.com/img/" target="_blank"></head>
+        <body>blahablsdfsal&amp;</body>
+        </html>""")
+        self.assertEqual(get_base_url(resp), "http://www.example.com/img/")
+
+        resp2 = HtmlResponse("http://www.example.com", body=b"""
+        <html><body>blahablsdfsal&amp;</body></html>""")
+        self.assertEqual(get_base_url(resp2), "http://www.example.com")
+
+    def test_response_status_message(self):
+        self.assertEqual(response_status_message(200), '200 OK')
+        self.assertEqual(response_status_message(404), '404 Not Found')
+        self.assertEqual(response_status_message(573), "573 Unknown Status")
diff --git a/tests/test_utils_serialize.py b/tests/test_utils_serialize.py
index 278cf91e3..daf022aee 100644
--- a/tests/test_utils_serialize.py
+++ b/tests/test_utils_serialize.py
@@ -1,18 +1,25 @@
+import datetime
 import json
 import unittest
-import datetime
 from decimal import Decimal
 
+import attr
 from twisted.internet import defer
 
-from scrapy.utils.serialize import ScrapyJSONEncoder
 from scrapy.http import Request, Response
+from scrapy.utils.serialize import ScrapyJSONEncoder
+
+
+try:
+    from dataclasses import make_dataclass
+except ImportError:
+    make_dataclass = None
 
 
 class JsonEncoderTestCase(unittest.TestCase):
 
     def setUp(self):
-        self.encoder = ScrapyJSONEncoder()
+        self.encoder = ScrapyJSONEncoder(sort_keys=True)
 
     def test_encode_decode(self):
         dt = datetime.datetime(2010, 1, 2, 10, 11, 12)
@@ -23,10 +30,16 @@ class JsonEncoderTestCase(unittest.TestCase):
         ts = "10:11:12"
         dec = Decimal("1000.12")
         decs = "1000.12"
+        s = {'foo'}
+        ss = ['foo']
+        dt_set = {dt}
+        dt_sets = [dts]
 
         for input, output in [('foo', 'foo'), (d, ds), (t, ts), (dt, dts),
-                              (dec, decs), (['foo', d], ['foo', ds])]:
-            self.assertEqual(self.encoder.encode(input), json.dumps(output))
+                              (dec, decs), (['foo', d], ['foo', ds]), (s, ss),
+                              (dt_set, dt_sets)]:
+            self.assertEqual(self.encoder.encode(input),
+                             json.dumps(output, sort_keys=True))
 
     def test_encode_deferred(self):
         self.assertIn('Deferred', self.encoder.encode(defer.Deferred()))
@@ -42,3 +55,30 @@ class JsonEncoderTestCase(unittest.TestCase):
         rs = self.encoder.encode(r)
         self.assertIn(r.url, rs)
         self.assertIn(str(r.status), rs)
+
+    @unittest.skipIf(not make_dataclass, "No dataclass support")
+    def test_encode_dataclass_item(self):
+        TestDataClass = make_dataclass(
+            "TestDataClass",
+            [("name", str), ("url", str), ("price", int)],
+        )
+        item = TestDataClass(name="Product", url="http://product.org", price=1)
+        encoded = self.encoder.encode(item)
+        self.assertEqual(
+            encoded,
+            '{"name": "Product", "price": 1, "url": "http://product.org"}'
+        )
+
+    def test_encode_attrs_item(self):
+        @attr.s
+        class AttrsItem:
+            name = attr.ib(type=str)
+            url = attr.ib(type=str)
+            price = attr.ib(type=int)
+
+        item = AttrsItem(name="Product", url="http://product.org", price=1)
+        encoded = self.encoder.encode(item)
+        self.assertEqual(
+            encoded,
+            '{"name": "Product", "price": 1, "url": "http://product.org"}'
+        )
diff --git a/tests/test_utils_signal.py b/tests/test_utils_signal.py
index a9f377dab..b66588efb 100644
--- a/tests/test_utils_signal.py
+++ b/tests/test_utils_signal.py
@@ -1,11 +1,15 @@
+import asyncio
+
+from pytest import mark
+from testfixtures import LogCapture
 from twisted.trial import unittest
-from twisted.python import log as txlog
 from twisted.python.failure import Failure
 from twisted.internet import defer, reactor
+from pydispatch import dispatcher
 
-from scrapy.xlib.pydispatch import dispatcher
 from scrapy.utils.signal import send_catch_log, send_catch_log_deferred
-from scrapy import log
+from scrapy.utils.test import get_from_asyncio_queue
+
 
 class SendCatchLogTest(unittest.TestCase):
 
@@ -14,26 +18,24 @@ class SendCatchLogTest(unittest.TestCase):
         test_signal = object()
         handlers_called = set()
 
-        def log_received(event):
-            handlers_called.add(log_received)
-            assert "error_handler" in event['message'][0]
-            assert event['logLevel'] == log.ERROR
-
-        txlog.addObserver(log_received)
         dispatcher.connect(self.error_handler, signal=test_signal)
         dispatcher.connect(self.ok_handler, signal=test_signal)
-        result = yield defer.maybeDeferred(self._get_result, test_signal, arg='test', \
-            handlers_called=handlers_called)
+        with LogCapture() as log:
+            result = yield defer.maybeDeferred(
+                self._get_result, test_signal, arg='test',
+                handlers_called=handlers_called
+            )
 
         assert self.error_handler in handlers_called
         assert self.ok_handler in handlers_called
-        assert log_received in handlers_called
+        self.assertEqual(len(log.records), 1)
+        record = log.records[0]
+        self.assertIn('error_handler', record.getMessage())
+        self.assertEqual(record.levelname, 'ERROR')
         self.assertEqual(result[0][0], self.error_handler)
-        self.assert_(isinstance(result[0][1], Failure))
+        self.assertIsInstance(result[0][1], Failure)
         self.assertEqual(result[1], (self.ok_handler, "OK"))
 
-        txlog.removeObserver(log_received)
-        self.flushLoggedErrors()
         dispatcher.disconnect(self.error_handler, signal=test_signal)
         dispatcher.disconnect(self.ok_handler, signal=test_signal)
 
@@ -42,7 +44,7 @@ class SendCatchLogTest(unittest.TestCase):
 
     def error_handler(self, arg, handlers_called):
         handlers_called.add(self.error_handler)
-        a = 1/0
+        1 / 0
 
     def ok_handler(self, arg, handlers_called):
         handlers_called.add(self.ok_handler)
@@ -56,7 +58,7 @@ class SendCatchLogDeferredTest(SendCatchLogTest):
         return send_catch_log_deferred(signal, *a, **kw)
 
 
-class SendCatchLogDeferredTest2(SendCatchLogTest):
+class SendCatchLogDeferredTest2(SendCatchLogDeferredTest):
 
     def ok_handler(self, arg, handlers_called):
         handlers_called.add(self.ok_handler)
@@ -65,20 +67,36 @@ class SendCatchLogDeferredTest2(SendCatchLogTest):
         reactor.callLater(0, d.callback, "OK")
         return d
 
-    def _get_result(self, signal, *a, **kw):
-        return send_catch_log_deferred(signal, *a, **kw)
+
+class SendCatchLogDeferredAsyncDefTest(SendCatchLogDeferredTest):
+
+    async def ok_handler(self, arg, handlers_called):
+        handlers_called.add(self.ok_handler)
+        assert arg == 'test'
+        await defer.succeed(42)
+        return "OK"
+
+
+@mark.only_asyncio()
+class SendCatchLogDeferredAsyncioTest(SendCatchLogDeferredTest):
+
+    async def ok_handler(self, arg, handlers_called):
+        handlers_called.add(self.ok_handler)
+        assert arg == 'test'
+        await asyncio.sleep(0.2)
+        return await get_from_asyncio_queue("OK")
+
 
 class SendCatchLogTest2(unittest.TestCase):
 
     def test_error_logged_if_deferred_not_supported(self):
+        def test_handler():
+            return defer.Deferred()
+
         test_signal = object()
-        test_handler = lambda: defer.Deferred()
-        log_events = []
-        txlog.addObserver(log_events.append)
         dispatcher.connect(test_handler, test_signal)
-        send_catch_log(test_signal)
-        self.assertTrue(log_events)
-        self.assertIn("Cannot return deferreds from signal handler", str(log_events))
-        txlog.removeObserver(log_events.append)
-        self.flushLoggedErrors()
+        with LogCapture() as log:
+            send_catch_log(test_signal)
+        self.assertEqual(len(log.records), 1)
+        self.assertIn("Cannot return deferreds from signal handler", str(log))
         dispatcher.disconnect(test_handler, test_signal)
diff --git a/tests/test_utils_sitemap.py b/tests/test_utils_sitemap.py
index bd2677956..23eb261b7 100644
--- a/tests/test_utils_sitemap.py
+++ b/tests/test_utils_sitemap.py
@@ -2,6 +2,7 @@ import unittest
 
 from scrapy.utils.sitemap import Sitemap, sitemap_urls_from_robots
 
+
 class SitemapTest(unittest.TestCase):
 
     def test_sitemap(self):
@@ -21,8 +22,14 @@ class SitemapTest(unittest.TestCase):
   </url>
 </urlset>""")
         assert s.type == 'urlset'
-        self.assertEqual(list(s),
-            [{'priority': '1', 'loc': 'http://www.example.com/', 'lastmod': '2009-08-16', 'changefreq': 'daily'}, {'priority': '0.8', 'loc': 'http://www.example.com/Special-Offers.html', 'lastmod': '2009-08-16', 'changefreq': 'weekly'}])
+        self.assertEqual(
+            list(s),
+            [
+                {'priority': '1', 'loc': 'http://www.example.com/', 'lastmod': '2009-08-16', 'changefreq': 'daily'},
+                {'priority': '0.8', 'loc': 'http://www.example.com/Special-Offers.html',
+                 'lastmod': '2009-08-16', 'changefreq': 'weekly'},
+            ]
+        )
 
     def test_sitemap_index(self):
         s = Sitemap(b"""<?xml version="1.0" encoding="UTF-8"?>
@@ -37,7 +44,13 @@ class SitemapTest(unittest.TestCase):
    </sitemap>
 </sitemapindex>""")
         assert s.type == 'sitemapindex'
-        self.assertEqual(list(s), [{'loc': 'http://www.example.com/sitemap1.xml.gz', 'lastmod': '2004-10-01T18:23:17+00:00'}, {'loc': 'http://www.example.com/sitemap2.xml.gz', 'lastmod': '2005-01-01'}])
+        self.assertEqual(
+            list(s),
+            [
+                {'loc': 'http://www.example.com/sitemap1.xml.gz', 'lastmod': '2004-10-01T18:23:17+00:00'},
+                {'loc': 'http://www.example.com/sitemap2.xml.gz', 'lastmod': '2005-01-01'},
+            ]
+        )
 
     def test_sitemap_strip(self):
         """Assert we can deal with trailing spaces inside <loc> tags - we've
@@ -57,10 +70,13 @@ class SitemapTest(unittest.TestCase):
   </url>
 </urlset>
 """)
-        self.assertEqual(list(s),
-            [{'priority': '1', 'loc': 'http://www.example.com/', 'lastmod': '2009-08-16', 'changefreq': 'daily'},
-             {'loc': 'http://www.example.com/2', 'lastmod': ''},
-            ])
+        self.assertEqual(
+            list(s),
+            [
+                {'priority': '1', 'loc': 'http://www.example.com/', 'lastmod': '2009-08-16', 'changefreq': 'daily'},
+                {'loc': 'http://www.example.com/2', 'lastmod': ''},
+            ]
+        )
 
     def test_sitemap_wrong_ns(self):
         """We have seen sitemaps with wrongs ns. Presumably, Google still works
@@ -79,10 +95,13 @@ class SitemapTest(unittest.TestCase):
   </url>
 </urlset>
 """)
-        self.assertEqual(list(s),
-            [{'priority': '1', 'loc': 'http://www.example.com/', 'lastmod': '2009-08-16', 'changefreq': 'daily'},
-             {'loc': 'http://www.example.com/2', 'lastmod': ''},
-            ])
+        self.assertEqual(
+            list(s),
+            [
+                {'priority': '1', 'loc': 'http://www.example.com/', 'lastmod': '2009-08-16', 'changefreq': 'daily'},
+                {'loc': 'http://www.example.com/2', 'lastmod': ''},
+            ]
+        )
 
     def test_sitemap_wrong_ns2(self):
         """We have seen sitemaps with wrongs ns. Presumably, Google still works
@@ -102,10 +121,13 @@ class SitemapTest(unittest.TestCase):
 </urlset>
 """)
         assert s.type == 'urlset'
-        self.assertEqual(list(s),
-            [{'priority': '1', 'loc': 'http://www.example.com/', 'lastmod': '2009-08-16', 'changefreq': 'daily'},
-             {'loc': 'http://www.example.com/2', 'lastmod': ''},
-            ])
+        self.assertEqual(
+            list(s),
+            [
+                {'priority': '1', 'loc': 'http://www.example.com/', 'lastmod': '2009-08-16', 'changefreq': 'daily'},
+                {'loc': 'http://www.example.com/2', 'lastmod': ''},
+            ]
+        )
 
     def test_sitemap_urls_from_robots(self):
         robots = """User-agent: *
@@ -119,18 +141,22 @@ Disallow: /s*/*tags
 # Sitemap files
 Sitemap: http://example.com/sitemap.xml
 Sitemap: http://example.com/sitemap-product-index.xml
+Sitemap: HTTP://example.com/sitemap-uppercase.xml
+Sitemap: /sitemap-relative-url.xml
 
 # Forums
 Disallow: /forum/search/
 Disallow: /forum/active/
 """
-        self.assertEqual(list(sitemap_urls_from_robots(robots)),
-             ['http://example.com/sitemap.xml', 'http://example.com/sitemap-product-index.xml'])
+        self.assertEqual(list(sitemap_urls_from_robots(robots, base_url='http://example.com')),
+                         ['http://example.com/sitemap.xml',
+                          'http://example.com/sitemap-product-index.xml',
+                          'http://example.com/sitemap-uppercase.xml',
+                          'http://example.com/sitemap-relative-url.xml'])
 
     def test_sitemap_blanklines(self):
         """Assert we can deal with starting blank lines before <xml> tag"""
-        s = Sitemap(b"""\
-
+        s = Sitemap(b"""
 <?xml version="1.0" encoding="UTF-8"?>
 <sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
 
@@ -189,11 +215,19 @@ Disallow: /forum/active/
         </url>
     </urlset>""")
 
-        self.assertEqual(list(s), [
-            {'loc': 'http://www.example.com/english/',
-             'alternate': ['http://www.example.com/deutsch/', 'http://www.example.com/schweiz-deutsch/', 'http://www.example.com/english/']
-            }
-        ])
+        self.assertEqual(
+            list(s),
+            [
+                {
+                    'loc': 'http://www.example.com/english/',
+                    'alternate': [
+                        'http://www.example.com/deutsch/',
+                        'http://www.example.com/schweiz-deutsch/',
+                        'http://www.example.com/english/',
+                    ],
+                }
+            ]
+        )
 
     def test_xml_entity_expansion(self):
         s = Sitemap(b"""<?xml version="1.0" encoding="utf-8"?>
diff --git a/tests/test_utils_spider.py b/tests/test_utils_spider.py
index 334ec00f3..3c87268ab 100644
--- a/tests/test_utils_spider.py
+++ b/tests/test_utils_spider.py
@@ -1,24 +1,23 @@
 import unittest
+
+from scrapy import Spider
 from scrapy.http import Request
-from scrapy.item import BaseItem
+from scrapy.item import Item
 from scrapy.utils.spider import iterate_spider_output, iter_spider_classes
 
-from scrapy.contrib.spiders import CrawlSpider
 
-
-class MyBaseSpider(CrawlSpider):
-    pass # abstract spider
-
-class MySpider1(MyBaseSpider):
+class MySpider1(Spider):
     name = 'myspider1'
 
-class MySpider2(MyBaseSpider):
+
+class MySpider2(Spider):
     name = 'myspider2'
 
+
 class UtilsSpidersTestCase(unittest.TestCase):
 
     def test_iterate_spider_output(self):
-        i = BaseItem()
+        i = Item()
         r = Request('http://scrapytest.org')
         o = object()
 
@@ -32,6 +31,6 @@ class UtilsSpidersTestCase(unittest.TestCase):
         it = iter_spider_classes(tests.test_utils_spider)
         self.assertEqual(set(it), {MySpider1, MySpider2})
 
+
 if __name__ == "__main__":
     unittest.main()
-
diff --git a/tests/test_utils_template.py b/tests/test_utils_template.py
index e690a8537..5ff2e41ef 100644
--- a/tests/test_utils_template.py
+++ b/tests/test_utils_template.py
@@ -1 +1,43 @@
+import os
+from shutil import rmtree
+from tempfile import mkdtemp
+import unittest
+from scrapy.utils.template import render_templatefile
+
+
 __doctests__ = ['scrapy.utils.template']
+
+
+class UtilsRenderTemplateFileTestCase(unittest.TestCase):
+
+    def setUp(self):
+        self.tmp_path = mkdtemp()
+
+    def tearDown(self):
+        rmtree(self.tmp_path)
+
+    def test_simple_render(self):
+
+        context = dict(project_name='proj', name='spi', classname='TheSpider')
+        template = 'from ${project_name}.spiders.${name} import ${classname}'
+        rendered = 'from proj.spiders.spi import TheSpider'
+
+        template_path = os.path.join(self.tmp_path, 'templ.py.tmpl')
+        render_path = os.path.join(self.tmp_path, 'templ.py')
+
+        with open(template_path, 'wb') as tmpl_file:
+            tmpl_file.write(template.encode('utf8'))
+        assert os.path.isfile(template_path)  # Failure of test itself
+
+        render_templatefile(template_path, **context)
+
+        self.assertFalse(os.path.exists(template_path))
+        with open(render_path, 'rb') as result:
+            self.assertEqual(result.read().decode('utf8'), rendered)
+
+        os.remove(render_path)
+        assert not os.path.exists(render_path)  # Failure of test iself
+
+
+if '__main__' == __name__:
+    unittest.main()
diff --git a/tests/test_utils_trackref.py b/tests/test_utils_trackref.py
new file mode 100644
index 000000000..b8e8c3130
--- /dev/null
+++ b/tests/test_utils_trackref.py
@@ -0,0 +1,85 @@
+import unittest
+from io import StringIO
+from time import sleep, time
+from unittest import mock
+
+from twisted.trial.unittest import SkipTest
+
+from scrapy.utils import trackref
+
+
+class Foo(trackref.object_ref):
+    pass
+
+
+class Bar(trackref.object_ref):
+    pass
+
+
+class TrackrefTestCase(unittest.TestCase):
+
+    def setUp(self):
+        trackref.live_refs.clear()
+
+    def test_format_live_refs(self):
+        o1 = Foo()  # NOQA
+        o2 = Bar()  # NOQA
+        o3 = Foo()  # NOQA
+        self.assertEqual(
+            trackref.format_live_refs(),
+            '''\
+Live References
+
+Bar                                 1   oldest: 0s ago
+Foo                                 2   oldest: 0s ago
+''')
+
+        self.assertEqual(
+            trackref.format_live_refs(ignore=Foo),
+            '''\
+Live References
+
+Bar                                 1   oldest: 0s ago
+''')
+
+    @mock.patch('sys.stdout', new_callable=StringIO)
+    def test_print_live_refs_empty(self, stdout):
+        trackref.print_live_refs()
+        self.assertEqual(stdout.getvalue(), 'Live References\n\n\n')
+
+    @mock.patch('sys.stdout', new_callable=StringIO)
+    def test_print_live_refs_with_objects(self, stdout):
+        o1 = Foo()  # NOQA
+        trackref.print_live_refs()
+        self.assertEqual(stdout.getvalue(), '''\
+Live References
+
+Foo                                 1   oldest: 0s ago\n\n''')
+
+    def test_get_oldest(self):
+        o1 = Foo()  # NOQA
+
+        o1_time = time()
+
+        o2 = Bar()  # NOQA
+
+        o3_time = time()
+        if o3_time <= o1_time:
+            sleep(0.01)
+            o3_time = time()
+        if o3_time <= o1_time:
+            raise SkipTest('time.time is not precise enough')
+
+        o3 = Foo()  # NOQA
+        self.assertIs(trackref.get_oldest('Foo'), o1)
+        self.assertIs(trackref.get_oldest('Bar'), o2)
+        self.assertIsNone(trackref.get_oldest('XXX'))
+
+    def test_iter_all(self):
+        o1 = Foo()  # NOQA
+        o2 = Bar()  # NOQA
+        o3 = Foo()  # NOQA
+        self.assertEqual(
+            set(trackref.iter_all('Foo')),
+            {o1, o3},
+        )
diff --git a/tests/test_utils_url.py b/tests/test_utils_url.py
index 959760068..2f885a0e8 100644
--- a/tests/test_utils_url.py
+++ b/tests/test_utils_url.py
@@ -1,7 +1,15 @@
 import unittest
 
-from scrapy.spider import Spider
-from scrapy.utils.url import url_is_from_any_domain, url_is_from_spider, canonicalize_url
+from scrapy.spiders import Spider
+from scrapy.utils.url import (
+    add_http_if_no_scheme,
+    guess_scheme,
+    _is_filesystem_path,
+    strip_url,
+    url_is_from_any_domain,
+    url_is_from_spider,
+)
+
 
 __doctests__ = ['scrapy.utils.url']
 
@@ -25,9 +33,12 @@ class UrlUtilsTest(unittest.TestCase):
         self.assertTrue(url_is_from_any_domain(url, ['192.169.0.15:8080']))
         self.assertFalse(url_is_from_any_domain(url, ['192.169.0.15']))
 
-        url = 'javascript:%20document.orderform_2581_1190810811.mode.value=%27add%27;%20javascript:%20document.orderform_2581_1190810811.submit%28%29'
+        url = (
+            'javascript:%20document.orderform_2581_1190810811.mode.value=%27add%27;%20'
+            'javascript:%20document.orderform_2581_1190810811.submit%28%29'
+        )
         self.assertFalse(url_is_from_any_domain(url, ['testdomain.com']))
-        self.assertFalse(url_is_from_any_domain(url+'.testdomain.com', ['testdomain.com']))
+        self.assertFalse(url_is_from_any_domain(url + '.testdomain.com', ['testdomain.com']))
 
     def test_url_is_from_spider(self):
         spider = Spider(name='example.com')
@@ -53,7 +64,7 @@ class UrlUtilsTest(unittest.TestCase):
         self.assertTrue(url_is_from_spider('http://www.example.net/some/page.html', spider))
         self.assertFalse(url_is_from_spider('http://www.example.us/some/page.html', spider))
 
-        spider = Spider(name='example.com', allowed_domains=set(('example.com', 'example.net')))
+        spider = Spider(name='example.com', allowed_domains={'example.com', 'example.net'})
         self.assertTrue(url_is_from_spider('http://www.example.com/some/page.html', spider))
 
         spider = Spider(name='example.com', allowed_domains=('example.com', 'example.net'))
@@ -70,104 +81,385 @@ class UrlUtilsTest(unittest.TestCase):
         self.assertTrue(url_is_from_spider('http://www.example.net/some/page.html', MySpider))
         self.assertFalse(url_is_from_spider('http://www.example.us/some/page.html', MySpider))
 
-    def test_canonicalize_url(self):
-        # simplest case
-        self.assertEqual(canonicalize_url("http://www.example.com/"),
-                                          "http://www.example.com/")
 
-        # always return a str
-        assert isinstance(canonicalize_url(u"http://www.example.com"), str)
+class AddHttpIfNoScheme(unittest.TestCase):
 
-        # append missing path
-        self.assertEqual(canonicalize_url("http://www.example.com"),
-                                          "http://www.example.com/")
-        # typical usage
-        self.assertEqual(canonicalize_url("http://www.example.com/do?a=1&b=2&c=3"),
-                                          "http://www.example.com/do?a=1&b=2&c=3")
-        self.assertEqual(canonicalize_url("http://www.example.com/do?c=1&b=2&a=3"),
-                                          "http://www.example.com/do?a=3&b=2&c=1")
-        self.assertEqual(canonicalize_url("http://www.example.com/do?&a=1"),
-                                          "http://www.example.com/do?a=1")
+    def test_add_scheme(self):
+        self.assertEqual(add_http_if_no_scheme('www.example.com'), 'http://www.example.com')
 
-        # sorting by argument values
-        self.assertEqual(canonicalize_url("http://www.example.com/do?c=3&b=5&b=2&a=50"),
-                                          "http://www.example.com/do?a=50&b=2&b=5&c=3")
+    def test_without_subdomain(self):
+        self.assertEqual(add_http_if_no_scheme('example.com'), 'http://example.com')
 
-        # using keep_blank_values
-        self.assertEqual(canonicalize_url("http://www.example.com/do?b=&a=2", keep_blank_values=False),
-                                          "http://www.example.com/do?a=2")
-        self.assertEqual(canonicalize_url("http://www.example.com/do?b=&a=2"),
-                                          "http://www.example.com/do?a=2&b=")
-        self.assertEqual(canonicalize_url("http://www.example.com/do?b=&c&a=2", keep_blank_values=False),
-                                          "http://www.example.com/do?a=2")
-        self.assertEqual(canonicalize_url("http://www.example.com/do?b=&c&a=2"),
-                                          "http://www.example.com/do?a=2&b=&c=")
+    def test_path(self):
+        self.assertEqual(
+            add_http_if_no_scheme('www.example.com/some/page.html'),
+            'http://www.example.com/some/page.html')
 
-        self.assertEqual(canonicalize_url(u'http://www.example.com/do?1750,4'),
-                                           'http://www.example.com/do?1750%2C4=')
+    def test_port(self):
+        self.assertEqual(
+            add_http_if_no_scheme('www.example.com:80'),
+            'http://www.example.com:80')
 
-        # spaces
-        self.assertEqual(canonicalize_url("http://www.example.com/do?q=a space&a=1"),
-                                          "http://www.example.com/do?a=1&q=a+space")
-        self.assertEqual(canonicalize_url("http://www.example.com/do?q=a+space&a=1"),
-                                          "http://www.example.com/do?a=1&q=a+space")
-        self.assertEqual(canonicalize_url("http://www.example.com/do?q=a%20space&a=1"),
-                                          "http://www.example.com/do?a=1&q=a+space")
+    def test_fragment(self):
+        self.assertEqual(
+            add_http_if_no_scheme('www.example.com/some/page#frag'),
+            'http://www.example.com/some/page#frag')
 
-        # normalize percent-encoding case (in paths)
-        self.assertEqual(canonicalize_url("http://www.example.com/a%a3do"),
-                                          "http://www.example.com/a%A3do"),
-        # normalize percent-encoding case (in query arguments)
-        self.assertEqual(canonicalize_url("http://www.example.com/do?k=b%a3"),
-                                          "http://www.example.com/do?k=b%A3")
+    def test_query(self):
+        self.assertEqual(
+            add_http_if_no_scheme('www.example.com/do?a=1&b=2&c=3'),
+            'http://www.example.com/do?a=1&b=2&c=3')
 
-        # non-ASCII percent-encoding in paths
-        self.assertEqual(canonicalize_url("http://www.example.com/a do?a=1"),
-                                          "http://www.example.com/a%20do?a=1"),
-        self.assertEqual(canonicalize_url("http://www.example.com/a %20do?a=1"),
-                                          "http://www.example.com/a%20%20do?a=1"),
-        self.assertEqual(canonicalize_url("http://www.example.com/a do\xc2\xa3.html?a=1"),
-                                          "http://www.example.com/a%20do%C2%A3.html?a=1")
-        # non-ASCII percent-encoding in query arguments
-        self.assertEqual(canonicalize_url(u"http://www.example.com/do?price=\xa3500&a=5&z=3"),
-                                          u"http://www.example.com/do?a=5&price=%C2%A3500&z=3")
-        self.assertEqual(canonicalize_url("http://www.example.com/do?price=\xc2\xa3500&a=5&z=3"),
-                                          "http://www.example.com/do?a=5&price=%C2%A3500&z=3")
-        self.assertEqual(canonicalize_url("http://www.example.com/do?price(\xc2\xa3)=500&a=1"),
-                                          "http://www.example.com/do?a=1&price%28%C2%A3%29=500")
+    def test_username_password(self):
+        self.assertEqual(
+            add_http_if_no_scheme('username:password@www.example.com'),
+            'http://username:password@www.example.com')
 
-        # urls containing auth and ports
-        self.assertEqual(canonicalize_url(u"http://user:pass@www.example.com:81/do?now=1"),
-                                          u"http://user:pass@www.example.com:81/do?now=1")
+    def test_complete_url(self):
+        self.assertEqual(
+            add_http_if_no_scheme('username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag'),
+            'http://username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag')
 
-        # remove fragments
-        self.assertEqual(canonicalize_url(u"http://user:pass@www.example.com/do?a=1#frag"),
-                                          u"http://user:pass@www.example.com/do?a=1")
-        self.assertEqual(canonicalize_url(u"http://user:pass@www.example.com/do?a=1#frag", keep_fragments=True),
-                                          u"http://user:pass@www.example.com/do?a=1#frag")
+    def test_preserve_http(self):
+        self.assertEqual(add_http_if_no_scheme('http://www.example.com'), 'http://www.example.com')
 
-        # dont convert safe characters to percent encoding representation
-        self.assertEqual(canonicalize_url(
-            "http://www.simplybedrooms.com/White-Bedroom-Furniture/Bedroom-Mirror:-Josephine-Cheval-Mirror.html"),
-            "http://www.simplybedrooms.com/White-Bedroom-Furniture/Bedroom-Mirror:-Josephine-Cheval-Mirror.html")
+    def test_preserve_http_without_subdomain(self):
+        self.assertEqual(
+            add_http_if_no_scheme('http://example.com'),
+            'http://example.com')
 
-        # urllib.quote uses a mapping cache of encoded characters. when parsing
-        # an already percent-encoded url, it will fail if that url was not
-        # percent-encoded as utf-8, that's why canonicalize_url must always
-        # convert the urls to string. the following test asserts that
-        # functionality.
-        self.assertEqual(canonicalize_url(u'http://www.example.com/caf%E9-con-leche.htm'),
-                                           'http://www.example.com/caf%E9-con-leche.htm')
+    def test_preserve_http_path(self):
+        self.assertEqual(
+            add_http_if_no_scheme('http://www.example.com/some/page.html'),
+            'http://www.example.com/some/page.html')
 
-        # domains are case insensitive
-        self.assertEqual(canonicalize_url("http://www.EXAMPLE.com/"),
-                                          "http://www.example.com/")
+    def test_preserve_http_port(self):
+        self.assertEqual(
+            add_http_if_no_scheme('http://www.example.com:80'),
+            'http://www.example.com:80')
 
-        # quoted slash and question sign
-        self.assertEqual(canonicalize_url("http://foo.com/AC%2FDC+rocks%3f/?yeah=1"),
-                         "http://foo.com/AC%2FDC+rocks%3F/?yeah=1")
-        self.assertEqual(canonicalize_url("http://foo.com/AC%2FDC/"),
-                         "http://foo.com/AC%2FDC/")
+    def test_preserve_http_fragment(self):
+        self.assertEqual(
+            add_http_if_no_scheme('http://www.example.com/some/page#frag'),
+            'http://www.example.com/some/page#frag')
+
+    def test_preserve_http_query(self):
+        self.assertEqual(
+            add_http_if_no_scheme('http://www.example.com/do?a=1&b=2&c=3'),
+            'http://www.example.com/do?a=1&b=2&c=3')
+
+    def test_preserve_http_username_password(self):
+        self.assertEqual(
+            add_http_if_no_scheme('http://username:password@www.example.com'),
+            'http://username:password@www.example.com')
+
+    def test_preserve_http_complete_url(self):
+        self.assertEqual(
+            add_http_if_no_scheme('http://username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag'),
+            'http://username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag')
+
+    def test_protocol_relative(self):
+        self.assertEqual(
+            add_http_if_no_scheme('//www.example.com'), 'http://www.example.com')
+
+    def test_protocol_relative_without_subdomain(self):
+        self.assertEqual(
+            add_http_if_no_scheme('//example.com'), 'http://example.com')
+
+    def test_protocol_relative_path(self):
+        self.assertEqual(
+            add_http_if_no_scheme('//www.example.com/some/page.html'),
+            'http://www.example.com/some/page.html')
+
+    def test_protocol_relative_port(self):
+        self.assertEqual(
+            add_http_if_no_scheme('//www.example.com:80'),
+            'http://www.example.com:80')
+
+    def test_protocol_relative_fragment(self):
+        self.assertEqual(
+            add_http_if_no_scheme('//www.example.com/some/page#frag'),
+            'http://www.example.com/some/page#frag')
+
+    def test_protocol_relative_query(self):
+        self.assertEqual(
+            add_http_if_no_scheme('//www.example.com/do?a=1&b=2&c=3'),
+            'http://www.example.com/do?a=1&b=2&c=3')
+
+    def test_protocol_relative_username_password(self):
+        self.assertEqual(
+            add_http_if_no_scheme('//username:password@www.example.com'),
+            'http://username:password@www.example.com')
+
+    def test_protocol_relative_complete_url(self):
+        self.assertEqual(
+            add_http_if_no_scheme('//username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag'),
+            'http://username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag')
+
+    def test_preserve_https(self):
+        self.assertEqual(
+            add_http_if_no_scheme('https://www.example.com'),
+            'https://www.example.com')
+
+    def test_preserve_ftp(self):
+        self.assertEqual(add_http_if_no_scheme('ftp://www.example.com'), 'ftp://www.example.com')
+
+
+class GuessSchemeTest(unittest.TestCase):
+    pass
+
+
+def create_guess_scheme_t(args):
+    def do_expected(self):
+        url = guess_scheme(args[0])
+        assert url.startswith(args[1]), \
+            'Wrong scheme guessed: for `%s` got `%s`, expected `%s...`' % (args[0], url, args[1])
+    return do_expected
+
+
+def create_skipped_scheme_t(args):
+    def do_expected(self):
+        raise unittest.SkipTest(args[2])
+        url = guess_scheme(args[0])
+        assert url.startswith(args[1])
+    return do_expected
+
+
+for k, args in enumerate(
+    [
+        ('/index', 'file://'),
+        ('/index.html', 'file://'),
+        ('./index.html', 'file://'),
+        ('../index.html', 'file://'),
+        ('../../index.html', 'file://'),
+        ('./data/index.html', 'file://'),
+        ('.hidden/data/index.html', 'file://'),
+        ('/home/user/www/index.html', 'file://'),
+        ('//home/user/www/index.html', 'file://'),
+        ('file:///home/user/www/index.html', 'file://'),
+
+        ('index.html', 'http://'),
+        ('example.com', 'http://'),
+        ('www.example.com', 'http://'),
+        ('www.example.com/index.html', 'http://'),
+        ('http://example.com', 'http://'),
+        ('http://example.com/index.html', 'http://'),
+        ('localhost', 'http://'),
+        ('localhost/index.html', 'http://'),
+
+        # some corner cases (default to http://)
+        ('/', 'http://'),
+        ('.../test', 'http://'),
+    ],
+    start=1,
+):
+    t_method = create_guess_scheme_t(args)
+    t_method.__name__ = 'test_uri_%03d' % k
+    setattr(GuessSchemeTest, t_method.__name__, t_method)
+
+# TODO: the following tests do not pass with current implementation
+for k, args in enumerate(
+    [
+        (
+            r'C:\absolute\path\to\a\file.html',
+            'file://',
+            'Windows filepath are not supported for scrapy shell',
+        ),
+    ],
+    start=1,
+):
+    t_method = create_skipped_scheme_t(args)
+    t_method.__name__ = 'test_uri_skipped_%03d' % k
+    setattr(GuessSchemeTest, t_method.__name__, t_method)
+
+
+class StripUrl(unittest.TestCase):
+
+    def test_noop(self):
+        self.assertEqual(strip_url(
+            'http://www.example.com/index.html'),
+            'http://www.example.com/index.html')
+
+    def test_noop_query_string(self):
+        self.assertEqual(strip_url(
+            'http://www.example.com/index.html?somekey=somevalue'),
+            'http://www.example.com/index.html?somekey=somevalue')
+
+    def test_fragments(self):
+        self.assertEqual(strip_url(
+            'http://www.example.com/index.html?somekey=somevalue#section', strip_fragment=False),
+            'http://www.example.com/index.html?somekey=somevalue#section')
+
+    def test_path(self):
+        for input_url, origin, output_url in [
+            ('http://www.example.com/',
+             False,
+             'http://www.example.com/'),
+
+            ('http://www.example.com',
+             False,
+             'http://www.example.com'),
+
+            ('http://www.example.com',
+             True,
+             'http://www.example.com/'),
+        ]:
+            self.assertEqual(strip_url(input_url, origin_only=origin), output_url)
+
+    def test_credentials(self):
+        for i, o in [
+            ('http://username@www.example.com/index.html?somekey=somevalue#section',
+             'http://www.example.com/index.html?somekey=somevalue'),
+
+            ('https://username:@www.example.com/index.html?somekey=somevalue#section',
+             'https://www.example.com/index.html?somekey=somevalue'),
+
+            ('ftp://username:password@www.example.com/index.html?somekey=somevalue#section',
+             'ftp://www.example.com/index.html?somekey=somevalue'),
+        ]:
+            self.assertEqual(strip_url(i, strip_credentials=True), o)
+
+    def test_credentials_encoded_delims(self):
+        for i, o in [
+            # user: "username@"
+            # password: none
+            ('http://username%40@www.example.com/index.html?somekey=somevalue#section',
+             'http://www.example.com/index.html?somekey=somevalue'),
+
+            # user: "username:pass"
+            # password: ""
+            ('https://username%3Apass:@www.example.com/index.html?somekey=somevalue#section',
+             'https://www.example.com/index.html?somekey=somevalue'),
+
+            # user: "me"
+            # password: "user@domain.com"
+            ('ftp://me:user%40domain.com@www.example.com/index.html?somekey=somevalue#section',
+             'ftp://www.example.com/index.html?somekey=somevalue'),
+        ]:
+            self.assertEqual(strip_url(i, strip_credentials=True), o)
+
+    def test_default_ports_creds_off(self):
+        for i, o in [
+            ('http://username:password@www.example.com:80/index.html?somekey=somevalue#section',
+             'http://www.example.com/index.html?somekey=somevalue'),
+
+            ('http://username:password@www.example.com:8080/index.html#section',
+             'http://www.example.com:8080/index.html'),
+
+            ('http://username:password@www.example.com:443/index.html?somekey=somevalue&someotherkey=sov#section',
+             'http://www.example.com:443/index.html?somekey=somevalue&someotherkey=sov'),
+
+            ('https://username:password@www.example.com:443/index.html',
+             'https://www.example.com/index.html'),
+
+            ('https://username:password@www.example.com:442/index.html',
+             'https://www.example.com:442/index.html'),
+
+            ('https://username:password@www.example.com:80/index.html',
+             'https://www.example.com:80/index.html'),
+
+            ('ftp://username:password@www.example.com:21/file.txt',
+             'ftp://www.example.com/file.txt'),
+
+            ('ftp://username:password@www.example.com:221/file.txt',
+             'ftp://www.example.com:221/file.txt'),
+        ]:
+            self.assertEqual(strip_url(i), o)
+
+    def test_default_ports(self):
+        for i, o in [
+            ('http://username:password@www.example.com:80/index.html',
+             'http://username:password@www.example.com/index.html'),
+
+            ('http://username:password@www.example.com:8080/index.html',
+             'http://username:password@www.example.com:8080/index.html'),
+
+            ('http://username:password@www.example.com:443/index.html',
+             'http://username:password@www.example.com:443/index.html'),
+
+            ('https://username:password@www.example.com:443/index.html',
+             'https://username:password@www.example.com/index.html'),
+
+            ('https://username:password@www.example.com:442/index.html',
+             'https://username:password@www.example.com:442/index.html'),
+
+            ('https://username:password@www.example.com:80/index.html',
+             'https://username:password@www.example.com:80/index.html'),
+
+            ('ftp://username:password@www.example.com:21/file.txt',
+             'ftp://username:password@www.example.com/file.txt'),
+
+            ('ftp://username:password@www.example.com:221/file.txt',
+             'ftp://username:password@www.example.com:221/file.txt'),
+        ]:
+            self.assertEqual(strip_url(i, strip_default_port=True, strip_credentials=False), o)
+
+    def test_default_ports_keep(self):
+        for i, o in [
+            ('http://username:password@www.example.com:80/index.html?somekey=somevalue&someotherkey=sov#section',
+             'http://username:password@www.example.com:80/index.html?somekey=somevalue&someotherkey=sov'),
+
+            ('http://username:password@www.example.com:8080/index.html?somekey=somevalue&someotherkey=sov#section',
+             'http://username:password@www.example.com:8080/index.html?somekey=somevalue&someotherkey=sov'),
+
+            ('http://username:password@www.example.com:443/index.html',
+             'http://username:password@www.example.com:443/index.html'),
+
+            ('https://username:password@www.example.com:443/index.html',
+             'https://username:password@www.example.com:443/index.html'),
+
+            ('https://username:password@www.example.com:442/index.html',
+             'https://username:password@www.example.com:442/index.html'),
+
+            ('https://username:password@www.example.com:80/index.html',
+             'https://username:password@www.example.com:80/index.html'),
+
+            ('ftp://username:password@www.example.com:21/file.txt',
+             'ftp://username:password@www.example.com:21/file.txt'),
+
+            ('ftp://username:password@www.example.com:221/file.txt',
+             'ftp://username:password@www.example.com:221/file.txt'),
+        ]:
+            self.assertEqual(strip_url(i, strip_default_port=False, strip_credentials=False), o)
+
+    def test_origin_only(self):
+        for i, o in [
+            ('http://username:password@www.example.com/index.html',
+             'http://www.example.com/'),
+
+            ('http://username:password@www.example.com:80/foo/bar?query=value#somefrag',
+             'http://www.example.com/'),
+
+            ('http://username:password@www.example.com:8008/foo/bar?query=value#somefrag',
+             'http://www.example.com:8008/'),
+
+            ('https://username:password@www.example.com:443/index.html',
+             'https://www.example.com/'),
+        ]:
+            self.assertEqual(strip_url(i, origin_only=True), o)
+
+
+class IsPathTestCase(unittest.TestCase):
+
+    def test_path(self):
+        for input_value, output_value in (
+            # https://en.wikipedia.org/wiki/Path_(computing)#Representations_of_paths_by_operating_system_and_shell
+            # Unix-like OS, Microsoft Windows / cmd.exe
+            ("/home/user/docs/Letter.txt", True),
+            ("./inthisdir", True),
+            ("../../greatgrandparent", True),
+            ("~/.rcinfo", True),
+            (r"C:\user\docs\Letter.txt", True),
+            ("/user/docs/Letter.txt", True),
+            (r"C:\Letter.txt", True),
+            (r"\\Server01\user\docs\Letter.txt", True),
+            (r"\\?\UNC\Server01\user\docs\Letter.txt", True),
+            (r"\\?\C:\user\docs\Letter.txt", True),
+            (r"C:\user\docs\somefile.ext:alternate_stream_name", True),
+
+            (r"https://example.com", False),
+        ):
+            self.assertEqual(_is_filesystem_path(input_value), output_value, input_value)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_webclient.py b/tests/test_webclient.py
index a16eb1ccf..ee64d455c 100644
--- a/tests/test_webclient.py
+++ b/tests/test_webclient.py
@@ -3,30 +3,53 @@ from twisted.internet import defer
 Tests borrowed from the twisted.web.client tests.
 """
 import os
-from six.moves.urllib.parse import urlparse
+import shutil
 
+import OpenSSL.SSL
 from twisted.trial import unittest
-from twisted.web import server, static, error, util
+from twisted.web import server, static, util, resource
 from twisted.internet import reactor, defer
-from twisted.test.proto_helpers import StringTransport
+try:
+    from twisted.internet.testing import StringTransport
+except ImportError:
+    # deprecated in Twisted 19.7.0
+    # (remove once we bump our requirement past that version)
+    from twisted.test.proto_helpers import StringTransport
 from twisted.python.filepath import FilePath
 from twisted.protocols.policies import WrappingFactory
+from twisted.internet.defer import inlineCallbacks
+from twisted.web.test.test_webclient import (
+    ForeverTakingResource,
+    ErrorResource,
+    NoLengthResource,
+    HostHeaderResource,
+    PayloadResource,
+    BrokenDownloadResource,
+)
 
 from scrapy.core.downloader import webclient as client
+from scrapy.core.downloader.contextfactory import ScrapyClientContextFactory
 from scrapy.http import Request, Headers
+from scrapy.settings import Settings
+from scrapy.utils.misc import create_instance
+from scrapy.utils.python import to_bytes, to_unicode
+from tests.mockserver import ssl_context_factory
 
 
-def getPage(url, contextFactory=None, *args, **kwargs):
+def getPage(url, contextFactory=None, response_transform=None, *args, **kwargs):
     """Adapted version of twisted.web.client.getPage"""
-    def _clientfactory(*args, **kwargs):
+    def _clientfactory(url, *args, **kwargs):
+        url = to_unicode(url)
         timeout = kwargs.pop('timeout', 0)
-        f = client.ScrapyHTTPClientFactory(Request(*args, **kwargs), timeout=timeout)
-        f.deferred.addCallback(lambda r: r.body)
+        f = client.ScrapyHTTPClientFactory(
+            Request(url, *args, **kwargs), timeout=timeout)
+        f.deferred.addCallback(response_transform or (lambda r: r.body))
         return f
 
     from twisted.web.client import _makeGetterFactory
-    return _makeGetterFactory(url, _clientfactory,
-        contextFactory=contextFactory, *args, **kwargs).deferred
+    return _makeGetterFactory(
+        to_bytes(url), _clientfactory, contextFactory=contextFactory, *args, **kwargs
+    ).deferred
 
 
 class ParseUrlTestCase(unittest.TestCase):
@@ -39,49 +62,34 @@ class ParseUrlTestCase(unittest.TestCase):
     def testParse(self):
         lip = '127.0.0.1'
         tests = (
-    ("http://127.0.0.1?c=v&c2=v2#fragment",     ('http', lip, lip, 80, '/?c=v&c2=v2')),
-    ("http://127.0.0.1/?c=v&c2=v2#fragment",    ('http', lip, lip, 80, '/?c=v&c2=v2')),
-    ("http://127.0.0.1/foo?c=v&c2=v2#frag",     ('http', lip, lip, 80, '/foo?c=v&c2=v2')),
-    ("http://127.0.0.1:100?c=v&c2=v2#fragment", ('http', lip+':100', lip, 100, '/?c=v&c2=v2')),
-    ("http://127.0.0.1:100/?c=v&c2=v2#frag",    ('http', lip+':100', lip, 100, '/?c=v&c2=v2')),
-    ("http://127.0.0.1:100/foo?c=v&c2=v2#frag", ('http', lip+':100', lip, 100, '/foo?c=v&c2=v2')),
+            ("http://127.0.0.1?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
+            ("http://127.0.0.1/?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
+            ("http://127.0.0.1/foo?c=v&c2=v2#frag", ('http', lip, lip, 80, '/foo?c=v&c2=v2')),
+            ("http://127.0.0.1:100?c=v&c2=v2#fragment", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
+            ("http://127.0.0.1:100/?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
+            ("http://127.0.0.1:100/foo?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/foo?c=v&c2=v2')),
 
-    ("http://127.0.0.1",              ('http', lip, lip, 80, '/')),
-    ("http://127.0.0.1/",             ('http', lip, lip, 80, '/')),
-    ("http://127.0.0.1/foo",          ('http', lip, lip, 80, '/foo')),
-    ("http://127.0.0.1?param=value",  ('http', lip, lip, 80, '/?param=value')),
-    ("http://127.0.0.1/?param=value", ('http', lip, lip, 80, '/?param=value')),
-    ("http://127.0.0.1:12345/foo",    ('http', lip+':12345', lip, 12345, '/foo')),
-    ("http://spam:12345/foo",         ('http', 'spam:12345', 'spam', 12345, '/foo')),
-    ("http://spam.test.org/foo",      ('http', 'spam.test.org', 'spam.test.org', 80, '/foo')),
+            ("http://127.0.0.1", ('http', lip, lip, 80, '/')),
+            ("http://127.0.0.1/", ('http', lip, lip, 80, '/')),
+            ("http://127.0.0.1/foo", ('http', lip, lip, 80, '/foo')),
+            ("http://127.0.0.1?param=value", ('http', lip, lip, 80, '/?param=value')),
+            ("http://127.0.0.1/?param=value", ('http', lip, lip, 80, '/?param=value')),
+            ("http://127.0.0.1:12345/foo", ('http', lip + ':12345', lip, 12345, '/foo')),
+            ("http://spam:12345/foo", ('http', 'spam:12345', 'spam', 12345, '/foo')),
+            ("http://spam.test.org/foo", ('http', 'spam.test.org', 'spam.test.org', 80, '/foo')),
 
-    ("https://127.0.0.1/foo",         ('https', lip, lip, 443, '/foo')),
-    ("https://127.0.0.1/?param=value", ('https', lip, lip, 443, '/?param=value')),
-    ("https://127.0.0.1:12345/",      ('https', lip+':12345', lip, 12345, '/')),
+            ("https://127.0.0.1/foo", ('https', lip, lip, 443, '/foo')),
+            ("https://127.0.0.1/?param=value", ('https', lip, lip, 443, '/?param=value')),
+            ("https://127.0.0.1:12345/", ('https', lip + ':12345', lip, 12345, '/')),
 
-    ("http://scrapytest.org/foo ",    ('http', 'scrapytest.org', 'scrapytest.org', 80, '/foo')),
-    ("http://egg:7890 ",              ('http', 'egg:7890', 'egg', 7890, '/')),
-    )
+            ("http://scrapytest.org/foo ", ('http', 'scrapytest.org', 'scrapytest.org', 80, '/foo')),
+            ("http://egg:7890 ", ('http', 'egg:7890', 'egg', 7890, '/')),
+        )
 
         for url, test in tests:
-            self.assertEquals(client._parse(url), test, url)
-
-    def test_externalUnicodeInterference(self):
-        """
-        L{client._parse} should return C{str} for the scheme, host, and path
-        elements of its return tuple, even when passed an URL which has
-        previously been passed to L{urlparse} as a C{unicode} string.
-        """
-        badInput = u'http://example.com/path'
-        goodInput = badInput.encode('ascii')
-        urlparse(badInput)
-        scheme, netloc, host, port, path = self._parse(goodInput)
-        self.assertTrue(isinstance(scheme, str))
-        self.assertTrue(isinstance(netloc, str))
-        self.assertTrue(isinstance(host, str))
-        self.assertTrue(isinstance(path, str))
-        self.assertTrue(isinstance(port, int))
-
+            test = tuple(
+                to_bytes(x) if not isinstance(x, int) else x for x in test)
+            self.assertEqual(client._parse(url), test, url)
 
 
 class ScrapyHTTPPageGetterTests(unittest.TestCase):
@@ -98,23 +106,25 @@ class ScrapyHTTPPageGetterTests(unittest.TestCase):
                 'Content-Length': '12981',
                 'Useful': 'value'}))
 
-        self._test(factory,
-            "GET /bar HTTP/1.0\r\n"
-            "Content-Length: 9\r\n"
-            "Useful: value\r\n"
-            "Connection: close\r\n"
-            "User-Agent: fooble\r\n"
-            "Host: example.net\r\n"
-            "Cookie: blah blah\r\n"
-            "\r\n"
-            "some data")
+        self._test(
+            factory,
+            b"GET /bar HTTP/1.0\r\n"
+            b"Content-Length: 9\r\n"
+            b"Useful: value\r\n"
+            b"Connection: close\r\n"
+            b"User-Agent: fooble\r\n"
+            b"Host: example.net\r\n"
+            b"Cookie: blah blah\r\n"
+            b"\r\n"
+            b"some data")
 
         # test minimal sent headers
         factory = client.ScrapyHTTPClientFactory(Request('http://foo/bar'))
-        self._test(factory,
-            "GET /bar HTTP/1.0\r\n"
-            "Host: foo\r\n"
-            "\r\n")
+        self._test(
+            factory,
+            b"GET /bar HTTP/1.0\r\n"
+            b"Host: foo\r\n"
+            b"\r\n")
 
         # test a simple POST with body and content-type
         factory = client.ScrapyHTTPClientFactory(Request(
@@ -123,14 +133,28 @@ class ScrapyHTTPPageGetterTests(unittest.TestCase):
             body='name=value',
             headers={'Content-Type': 'application/x-www-form-urlencoded'}))
 
-        self._test(factory,
-            "POST /bar HTTP/1.0\r\n"
-            "Host: foo\r\n"
-            "Connection: close\r\n"
-            "Content-Type: application/x-www-form-urlencoded\r\n"
-            "Content-Length: 10\r\n"
-            "\r\n"
-            "name=value")
+        self._test(
+            factory,
+            b"POST /bar HTTP/1.0\r\n"
+            b"Host: foo\r\n"
+            b"Connection: close\r\n"
+            b"Content-Type: application/x-www-form-urlencoded\r\n"
+            b"Content-Length: 10\r\n"
+            b"\r\n"
+            b"name=value")
+
+        # test a POST method with no body provided
+        factory = client.ScrapyHTTPClientFactory(Request(
+            method='POST',
+            url='http://foo/bar'
+        ))
+
+        self._test(
+            factory,
+            b"POST /bar HTTP/1.0\r\n"
+            b"Host: foo\r\n"
+            b"Content-Length: 0\r\n"
+            b"\r\n")
 
         # test with single and multivalued headers
         factory = client.ScrapyHTTPClientFactory(Request(
@@ -138,15 +162,17 @@ class ScrapyHTTPPageGetterTests(unittest.TestCase):
             headers={
                 'X-Meta-Single': 'single',
                 'X-Meta-Multivalued': ['value1', 'value2'],
-                }))
+            },
+        ))
 
-        self._test(factory,
-            "GET /bar HTTP/1.0\r\n"
-            "Host: foo\r\n"
-            "X-Meta-Multivalued: value1\r\n"
-            "X-Meta-Multivalued: value2\r\n"
-            "X-Meta-Single: single\r\n"
-            "\r\n")
+        self._test(
+            factory,
+            b"GET /bar HTTP/1.0\r\n"
+            b"Host: foo\r\n"
+            b"X-Meta-Multivalued: value1\r\n"
+            b"X-Meta-Multivalued: value2\r\n"
+            b"X-Meta-Single: single\r\n"
+            b"\r\n")
 
         # same test with single and multivalued headers but using Headers class
         factory = client.ScrapyHTTPClientFactory(Request(
@@ -154,15 +180,17 @@ class ScrapyHTTPPageGetterTests(unittest.TestCase):
             headers=Headers({
                 'X-Meta-Single': 'single',
                 'X-Meta-Multivalued': ['value1', 'value2'],
-                })))
+            }),
+        ))
 
-        self._test(factory,
-            "GET /bar HTTP/1.0\r\n"
-            "Host: foo\r\n"
-            "X-Meta-Multivalued: value1\r\n"
-            "X-Meta-Multivalued: value2\r\n"
-            "X-Meta-Single: single\r\n"
-            "\r\n")
+        self._test(
+            factory,
+            b"GET /bar HTTP/1.0\r\n"
+            b"Host: foo\r\n"
+            b"X-Meta-Multivalued: value1\r\n"
+            b"X-Meta-Multivalued: value2\r\n"
+            b"X-Meta-Single: single\r\n"
+            b"\r\n")
 
     def _test(self, factory, testvalue):
         transport = StringTransport()
@@ -181,56 +209,65 @@ class ScrapyHTTPPageGetterTests(unittest.TestCase):
         protocol = client.ScrapyHTTPPageGetter()
         protocol.factory = factory
         protocol.headers = Headers()
-        protocol.dataReceived("HTTP/1.0 200 OK\n")
-        protocol.dataReceived("Hello: World\n")
-        protocol.dataReceived("Foo: Bar\n")
-        protocol.dataReceived("\n")
-        self.assertEqual(protocol.headers,
-            Headers({'Hello': ['World'], 'Foo': ['Bar']}))
+        protocol.dataReceived(b"HTTP/1.0 200 OK\n")
+        protocol.dataReceived(b"Hello: World\n")
+        protocol.dataReceived(b"Foo: Bar\n")
+        protocol.dataReceived(b"\n")
+        self.assertEqual(protocol.headers, Headers({'Hello': ['World'], 'Foo': ['Bar']}))
 
 
-from twisted.web.test.test_webclient import ForeverTakingResource, \
-        ErrorResource, NoLengthResource, HostHeaderResource, \
-        PayloadResource, BrokenDownloadResource
+class EncodingResource(resource.Resource):
+    out_encoding = 'cp1251'
+
+    def render(self, request):
+        body = to_unicode(request.content.read())
+        request.setHeader(b'content-encoding', self.out_encoding)
+        return body.encode(self.out_encoding)
+
 
 class WebClientTestCase(unittest.TestCase):
     def _listen(self, site):
         return reactor.listenTCP(0, site, interface="127.0.0.1")
 
     def setUp(self):
-        name = self.mktemp()
-        os.mkdir(name)
-        FilePath(name).child("file").setContent("0123456789")
-        r = static.File(name)
-        r.putChild("redirect", util.Redirect("/file"))
-        r.putChild("wait", ForeverTakingResource())
-        r.putChild("error", ErrorResource())
-        r.putChild("nolength", NoLengthResource())
-        r.putChild("host", HostHeaderResource())
-        r.putChild("payload", PayloadResource())
-        r.putChild("broken", BrokenDownloadResource())
+        self.tmpname = self.mktemp()
+        os.mkdir(self.tmpname)
+        FilePath(self.tmpname).child("file").setContent(b"0123456789")
+        r = static.File(self.tmpname)
+        r.putChild(b"redirect", util.Redirect(b"/file"))
+        r.putChild(b"wait", ForeverTakingResource())
+        r.putChild(b"error", ErrorResource())
+        r.putChild(b"nolength", NoLengthResource())
+        r.putChild(b"host", HostHeaderResource())
+        r.putChild(b"payload", PayloadResource())
+        r.putChild(b"broken", BrokenDownloadResource())
+        r.putChild(b"encoding", EncodingResource())
         self.site = server.Site(r, timeout=None)
         self.wrapper = WrappingFactory(self.site)
         self.port = self._listen(self.wrapper)
         self.portno = self.port.getHost().port
 
+    @inlineCallbacks
     def tearDown(self):
-        return self.port.stopListening()
+        yield self.port.stopListening()
+        shutil.rmtree(self.tmpname)
 
     def getURL(self, path):
         return "http://127.0.0.1:%d/%s" % (self.portno, path)
 
     def testPayload(self):
         s = "0123456789" * 10
-        return getPage(self.getURL("payload"), body=s).addCallback(self.assertEquals, s)
+        return getPage(self.getURL("payload"), body=s).addCallback(
+            self.assertEqual, to_bytes(s))
 
     def testHostHeader(self):
         # if we pass Host header explicitly, it should be used, otherwise
         # it should extract from url
         return defer.gatherResults([
-            getPage(self.getURL("host")).addCallback(self.assertEquals, "127.0.0.1:%d" % self.portno),
-            getPage(self.getURL("host"), headers={"Host": "www.example.com"}).addCallback(self.assertEquals, "www.example.com")])
-
+            getPage(self.getURL("host")).addCallback(
+                self.assertEqual, to_bytes("127.0.0.1:%d" % self.portno)),
+            getPage(self.getURL("host"), headers={"Host": "www.example.com"}).addCallback(
+                self.assertEqual, to_bytes("www.example.com"))])
 
     def test_getPage(self):
         """
@@ -238,10 +275,9 @@ class WebClientTestCase(unittest.TestCase):
         the body of the response if the default method B{GET} is used.
         """
         d = getPage(self.getURL("file"))
-        d.addCallback(self.assertEquals, "0123456789")
+        d.addCallback(self.assertEqual, b"0123456789")
         return d
 
-
     def test_getPageHead(self):
         """
         L{client.getPage} returns a L{Deferred} which is called back with
@@ -251,9 +287,8 @@ class WebClientTestCase(unittest.TestCase):
         def _getPage(method):
             return getPage(self.getURL("file"), method=method)
         return defer.gatherResults([
-            _getPage("head").addCallback(self.assertEqual, ""),
-            _getPage("HEAD").addCallback(self.assertEqual, "")])
-
+            _getPage("head").addCallback(self.assertEqual, b""),
+            _getPage("HEAD").addCallback(self.assertEqual, b"")])
 
     def test_timeoutNotTriggering(self):
         """
@@ -262,10 +297,10 @@ class WebClientTestCase(unittest.TestCase):
         called back with the contents of the page.
         """
         d = getPage(self.getURL("host"), timeout=100)
-        d.addCallback(self.assertEquals, "127.0.0.1:%d" % self.portno)
+        d.addCallback(
+            self.assertEqual, to_bytes("127.0.0.1:%d" % self.portno))
         return d
 
-
     def test_timeoutTriggering(self):
         """
         When a non-zero timeout is passed to L{getPage} and that many
@@ -275,10 +310,11 @@ class WebClientTestCase(unittest.TestCase):
         finished = self.assertFailure(
             getPage(self.getURL("wait"), timeout=0.000001),
             defer.TimeoutError)
+
         def cleanup(passthrough):
             # Clean up the server which is hanging around not doing
             # anything.
-            connected = self.wrapper.protocols.keys()
+            connected = list(self.wrapper.protocols.keys())
             # There might be nothing here if the server managed to already see
             # that the connection was lost.
             if connected:
@@ -291,26 +327,95 @@ class WebClientTestCase(unittest.TestCase):
         return getPage(self.getURL('notsuchfile')).addCallback(self._cbNoSuchFile)
 
     def _cbNoSuchFile(self, pageData):
-        self.assert_('404 - No Such Resource' in pageData)
+        self.assertIn(b'404 - No Such Resource', pageData)
 
     def testFactoryInfo(self):
         url = self.getURL('file')
-        scheme, netloc, host, port, path = client._parse(url)
+        _, _, host, port, _ = client._parse(url)
         factory = client.ScrapyHTTPClientFactory(Request(url))
-        reactor.connectTCP(host, port, factory)
+        reactor.connectTCP(to_unicode(host), port, factory)
         return factory.deferred.addCallback(self._cbFactoryInfo, factory)
 
     def _cbFactoryInfo(self, ignoredResult, factory):
-        self.assertEquals(factory.status, '200')
-        self.assert_(factory.version.startswith('HTTP/'))
-        self.assertEquals(factory.message, 'OK')
-        self.assertEquals(factory.response_headers['content-length'], '10')
+        self.assertEqual(factory.status, b'200')
+        self.assertTrue(factory.version.startswith(b'HTTP/'))
+        self.assertEqual(factory.message, b'OK')
+        self.assertEqual(factory.response_headers[b'content-length'], b'10')
 
     def testRedirect(self):
         return getPage(self.getURL("redirect")).addCallback(self._cbRedirect)
 
     def _cbRedirect(self, pageData):
-        self.assertEquals(pageData,
-                '\n<html>\n    <head>\n        <meta http-equiv="refresh" content="0;URL=/file">\n'
-                '    </head>\n    <body bgcolor="#FFFFFF" text="#000000">\n    '
-                '<a href="/file">click here</a>\n    </body>\n</html>\n')
+        self.assertEqual(
+            pageData,
+            b'\n<html>\n    <head>\n        <meta http-equiv="refresh" content="0;URL=/file">\n'
+            b'    </head>\n    <body bgcolor="#FFFFFF" text="#000000">\n    '
+            b'<a href="/file">click here</a>\n    </body>\n</html>\n')
+
+    def test_encoding(self):
+        """ Test that non-standart body encoding matches
+        Content-Encoding header """
+        body = b'\xd0\x81\xd1\x8e\xd0\xaf'
+        dfd = getPage(self.getURL('encoding'), body=body, response_transform=lambda r: r)
+        return dfd.addCallback(self._check_Encoding, body)
+
+    def _check_Encoding(self, response, original_body):
+        content_encoding = to_unicode(response.headers[b'Content-Encoding'])
+        self.assertEqual(content_encoding, EncodingResource.out_encoding)
+        self.assertEqual(
+            response.body.decode(content_encoding), to_unicode(original_body))
+
+
+class WebClientSSLTestCase(unittest.TestCase):
+    context_factory = None
+
+    def _listen(self, site):
+        return reactor.listenSSL(
+            0, site,
+            contextFactory=self.context_factory or ssl_context_factory(),
+            interface="127.0.0.1")
+
+    def getURL(self, path):
+        return "https://127.0.0.1:%d/%s" % (self.portno, path)
+
+    def setUp(self):
+        self.tmpname = self.mktemp()
+        os.mkdir(self.tmpname)
+        FilePath(self.tmpname).child("file").setContent(b"0123456789")
+        r = static.File(self.tmpname)
+        r.putChild(b"payload", PayloadResource())
+        self.site = server.Site(r, timeout=None)
+        self.wrapper = WrappingFactory(self.site)
+        self.port = self._listen(self.wrapper)
+        self.portno = self.port.getHost().port
+
+    @inlineCallbacks
+    def tearDown(self):
+        yield self.port.stopListening()
+        shutil.rmtree(self.tmpname)
+
+    def testPayload(self):
+        s = "0123456789" * 10
+        return getPage(self.getURL("payload"), body=s).addCallback(
+            self.assertEqual, to_bytes(s))
+
+
+class WebClientCustomCiphersSSLTestCase(WebClientSSLTestCase):
+    # we try to use a cipher that is not enabled by default in OpenSSL
+    custom_ciphers = 'CAMELLIA256-SHA'
+    context_factory = ssl_context_factory(cipher_string=custom_ciphers)
+
+    def testPayload(self):
+        s = "0123456789" * 10
+        settings = Settings({'DOWNLOADER_CLIENT_TLS_CIPHERS': self.custom_ciphers})
+        client_context_factory = create_instance(ScrapyClientContextFactory, settings=settings, crawler=None)
+        return getPage(
+            self.getURL("payload"), body=s, contextFactory=client_context_factory
+        ).addCallback(self.assertEqual, to_bytes(s))
+
+    def testPayloadDisabledCipher(self):
+        s = "0123456789" * 10
+        settings = Settings({'DOWNLOADER_CLIENT_TLS_CIPHERS': 'ECDHE-RSA-AES256-GCM-SHA384'})
+        client_context_factory = create_instance(ScrapyClientContextFactory, settings=settings, crawler=None)
+        d = getPage(self.getURL("payload"), body=s, contextFactory=client_context_factory)
+        return self.assertFailure(d, OpenSSL.SSL.Error)
diff --git a/tox.ini b/tox.ini
index 624f550e1..4f5531aea 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,67 +1,153 @@
-# Tox (http://tox.testrun.org/) is a tool for running tests
+# Tox (https://tox.readthedocs.io/) is a tool for running tests
 # in multiple virtualenvs. This configuration file will run the
 # test suite on all supported python versions. To use it, "pip install tox"
 # and then run "tox" from this directory.
 
 [tox]
-envlist = py27
+envlist = security,flake8,py
+minversion = 1.7.0
 
 [testenv]
 deps =
-    -rrequirements.txt
+    -ctests/constraints.txt
+    -rtests/requirements-py3.txt
     # Extras
+    boto3>=1.13.0
+    botocore>=1.4.87
+    Pillow>=3.4.2
+passenv =
+    S3_TEST_FILE_URI
+    AWS_ACCESS_KEY_ID
+    AWS_SECRET_ACCESS_KEY
+    GCS_TEST_FILE_URI
+    GCS_PROJECT_ID
+commands =
+    py.test --cov=scrapy --cov-report= {posargs:--durations=10 docs scrapy tests}
+
+[testenv:typing]
+basepython = python3
+deps =
+    mypy==0.780
+commands =
+    mypy {posargs: scrapy tests}
+
+[testenv:security]
+basepython = python3
+deps =
+    bandit
+commands =
+    bandit -r -c .bandit.yml {posargs:scrapy}
+
+[testenv:flake8]
+basepython = python3
+deps =
+    {[testenv]deps}
+    pytest-flake8
+commands =
+    py.test --flake8 {posargs:docs scrapy tests}
+
+[testenv:pylint]
+basepython = python3
+deps =
+    {[testenv]deps}
+    # Optional dependencies
     boto
-    Pillow
-    django
-    leveldb
-    -rtests/requirements.txt
+    reppy
+    robotexclusionrulesparser
+    # Test dependencies
+    pylint
 commands =
-    py.test {posargs:scrapy tests}
+    pylint conftest.py docs extras scrapy setup.py tests
 
-[testenv:precise]
-basepython = python2.7
+[pinned]
 deps =
-    pyOpenSSL==0.13
-    lxml==2.3.2
-    Twisted==11.1.0
-    boto==2.2.2
-    Pillow<2.0
-    django==1.3.1
+    -ctests/constraints.txt
+    cryptography==2.0
     cssselect==0.9.1
-    zope.interface==3.6.1
-    -rtests/requirements.txt
+    itemadapter==0.1.0
+    parsel==1.5.0
+    Protego==0.1.15
+    PyDispatcher==2.0.5
+    pyOpenSSL==16.2.0
+    queuelib==1.4.2
+    service_identity==16.0.0
+    Twisted==17.9.0
+    w3lib==1.17.0
+    zope.interface==4.1.3
+    -rtests/requirements-py3.txt
+    # Extras
+    botocore==1.4.87
+    google-cloud-storage==1.29.0
+    Pillow==3.4.2
 
-[testenv:trunk]
-basepython = python2.7
-commands =
-    pip install -U https://github.com/scrapy/w3lib/archive/master.zip#egg=w3lib
-    pip install -U https://github.com/scrapy/queuelib/archive/master.zip#egg=queuelib
-    py.test {posargs:scrapy tests}
-
-[testenv:py33]
-basepython = python3.3
+[testenv:pinned]
 deps =
-;    svn+svn://svn.twistedmatrix.com/svn/Twisted/trunk#egg=Twisted
-    Twisted >= 14.0.0
-    lxml>=3.2.4
-    pyOpenSSL>=0.13.1
-    cssselect>=0.9
-    queuelib>=1.1.1
-    w3lib>=1.8.0
-    Pillow
-    # tests requirements
-    mock
-    pytest>=2.6.0
-    pytest-twisted
+    {[pinned]deps}
+    lxml==3.5.0
 
-[testenv:py34]
-basepython = python3.4
-deps = {[testenv:py33]deps}
+[testenv:windows-pinned]
+basepython = python3
+deps =
+    {[pinned]deps}
+    # First lxml version that includes a Windows wheel for Python 3.5, so we do
+    # not need to build lxml from sources in a CI Windows job:
+    lxml==3.8.0
 
-[testenv:docs]
+[testenv:extra-deps]
+deps =
+    {[testenv]deps}
+    reppy
+    robotexclusionrulesparser
+
+[testenv:asyncio]
+commands =
+    {[testenv]commands} --reactor=asyncio
+
+[testenv:asyncio-pinned]
+commands = {[testenv:asyncio]commands}
+deps = {[testenv:pinned]deps}
+
+[testenv:pypy3]
+basepython = pypy3
+commands =
+    py.test {posargs:--durations=10 docs scrapy tests}
+
+[testenv:pypy3-pinned]
+basepython = {[testenv:pypy3]basepython}
+commands = {[testenv:pypy3]commands}
+deps =
+    {[pinned]deps}
+    lxml==4.0.0
+    PyPyDispatcher==2.1.0
+
+[docs]
 changedir = docs
 deps =
-    Sphinx
+    -rdocs/requirements.txt
+setenv =
+    READTHEDOCS_PROJECT=scrapy
+    READTHEDOCS_VERSION=master
+
+[testenv:docs]
+basepython = python3
+changedir = {[docs]changedir}
+deps = {[docs]deps}
+setenv = {[docs]setenv}
 commands =
-    sphinx-build -W -b html . build/html
-    sphinx-build -W -b linkcheck . build/linkcheck
+    sphinx-build -W -b html . {envtmpdir}/html
+
+[testenv:docs-coverage]
+basepython = python3
+changedir = {[docs]changedir}
+deps = {[docs]deps}
+setenv = {[docs]setenv}
+commands =
+    sphinx-build -b coverage . {envtmpdir}/coverage
+
+[testenv:docs-links]
+basepython = python3
+changedir = {[docs]changedir}
+deps = {[docs]deps}
+setenv = {[docs]setenv}
+commands =
+    sphinx-build -W -b linkcheck . {envtmpdir}/linkcheck