Merge remote-tracking branch 'upstream/master' into remove-six-code

2025-02-22 06:52:53 +00:00 · 2019-11-19 11:01:34 -03:00 · 2019-11-19 11:01:34 -03:00 · 05785c1c17
commit 05785c1c17
parent 07fa768d33 78ad01632f
51 changed files with 634 additions and 202 deletions
--- a/.bandit.yml
+++ b/.bandit.yml
@ -0,0 +1,16 @@
+skips:
+- B101
+- B105
+- B303
+- B306
+- B307
+- B311
+- B320
+- B321
+- B402
+- B404
+- B406
+- B410
+- B503
+- B603
+- B605
--- a/.travis.yml
+++ b/.travis.yml
@ -7,6 +7,8 @@ branches:
    - /^\d\.\d+\.\d+(rc\d+|\.dev\d+)?$/
 matrix:
  include:
+    - env: TOXENV=security
+      python: 3.8
    - env: TOXENV=flake8
      python: 3.8
    - env: TOXENV=pypy3
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@ -68,7 +68,7 @@ members of the project's leadership.
 ## Attribution

 This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
-available at [http://contributor-covenant.org/version/1/4][version]
+available at [http://contributor-covenant.org/version/1/4][version].

 [homepage]: http://contributor-covenant.org
 [version]: http://contributor-covenant.org/version/1/4/
--- a/README.rst
+++ b/README.rst
@ -34,8 +34,8 @@ Scrapy is a fast high-level web crawling and web scraping framework, used to
 crawl websites and extract structured data from their pages. It can be used for
 a wide range of purposes, from data mining to monitoring and automated testing.

-For more information including a list of features check the Scrapy homepage at:
-https://scrapy.org
+Check the Scrapy homepage at https://scrapy.org for more information,
+including a list of features.

 Requirements
 ============
@ -50,8 +50,8 @@ The quick way::

    pip install scrapy

-For more details see the install section in the documentation:
-https://docs.scrapy.org/en/latest/intro/install.html
+See the install section in the documentation at
+https://docs.scrapy.org/en/latest/intro/install.html for more details.

 Documentation
 =============
@ -62,17 +62,17 @@ directory.
 Releases
 ========

-You can find release notes at https://docs.scrapy.org/en/latest/news.html
+You can check https://docs.scrapy.org/en/latest/news.html for the release notes.

 Community (blog, twitter, mail list, IRC)
 =========================================

-See https://scrapy.org/community/
+See https://scrapy.org/community/ for details.

 Contributing
 ============

-See https://docs.scrapy.org/en/master/contributing.html
+See https://docs.scrapy.org/en/master/contributing.html for details.

 Code of Conduct
 ---------------
@ -86,9 +86,9 @@ Please report unacceptable behavior to opensource@scrapinghub.com.
 Companies using Scrapy
 ======================

-See https://scrapy.org/companies/
+See https://scrapy.org/companies/ for a list.

 Commercial Support
 ==================

-See https://scrapy.org/support/
+See https://scrapy.org/support/ for details.
--- a/docs/_tests/quotes1.html
+++ b/docs/_tests/quotes1.html
@ -0,0 +1,281 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+	<meta charset="UTF-8">
+	<title>Quotes to Scrape</title>
+    <link rel="stylesheet" href="/static/bootstrap.min.css">
+    <link rel="stylesheet" href="/static/main.css">
+</head>
+<body>
+    <div class="container">
+        <div class="row header-box">
+            <div class="col-md-8">
+                <h1>
+                    <a href="/" style="text-decoration: none">Quotes to Scrape</a>
+                </h1>
+            </div>
+            <div class="col-md-4">
+                <p>
+                
+                    <a href="/login">Login</a>
+                
+                </p>
+            </div>
+        </div>
+    
+
+<div class="row">
+    <div class="col-md-8">
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”</span>
+        <span>by <small class="author" itemprop="author">Albert Einstein</small>
+        <a href="/author/Albert-Einstein">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="change,deep-thoughts,thinking,world" /    > 
+            
+            <a class="tag" href="/tag/change/page/1/">change</a>
+            
+            <a class="tag" href="/tag/deep-thoughts/page/1/">deep-thoughts</a>
+            
+            <a class="tag" href="/tag/thinking/page/1/">thinking</a>
+            
+            <a class="tag" href="/tag/world/page/1/">world</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“It is our choices, Harry, that show what we truly are, far more than our abilities.”</span>
+        <span>by <small class="author" itemprop="author">J.K. Rowling</small>
+        <a href="/author/J-K-Rowling">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="abilities,choices" /    > 
+            
+            <a class="tag" href="/tag/abilities/page/1/">abilities</a>
+            
+            <a class="tag" href="/tag/choices/page/1/">choices</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”</span>
+        <span>by <small class="author" itemprop="author">Albert Einstein</small>
+        <a href="/author/Albert-Einstein">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="inspirational,life,live,miracle,miracles" /    > 
+            
+            <a class="tag" href="/tag/inspirational/page/1/">inspirational</a>
+            
+            <a class="tag" href="/tag/life/page/1/">life</a>
+            
+            <a class="tag" href="/tag/live/page/1/">live</a>
+            
+            <a class="tag" href="/tag/miracle/page/1/">miracle</a>
+            
+            <a class="tag" href="/tag/miracles/page/1/">miracles</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.”</span>
+        <span>by <small class="author" itemprop="author">Jane Austen</small>
+        <a href="/author/Jane-Austen">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="aliteracy,books,classic,humor" /    > 
+            
+            <a class="tag" href="/tag/aliteracy/page/1/">aliteracy</a>
+            
+            <a class="tag" href="/tag/books/page/1/">books</a>
+            
+            <a class="tag" href="/tag/classic/page/1/">classic</a>
+            
+            <a class="tag" href="/tag/humor/page/1/">humor</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“Imperfection is beauty, madness is genius and it&#39;s better to be absolutely ridiculous than absolutely boring.”</span>
+        <span>by <small class="author" itemprop="author">Marilyn Monroe</small>
+        <a href="/author/Marilyn-Monroe">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="be-yourself,inspirational" /    > 
+            
+            <a class="tag" href="/tag/be-yourself/page/1/">be-yourself</a>
+            
+            <a class="tag" href="/tag/inspirational/page/1/">inspirational</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“Try not to become a man of success. Rather become a man of value.”</span>
+        <span>by <small class="author" itemprop="author">Albert Einstein</small>
+        <a href="/author/Albert-Einstein">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="adulthood,success,value" /    > 
+            
+            <a class="tag" href="/tag/adulthood/page/1/">adulthood</a>
+            
+            <a class="tag" href="/tag/success/page/1/">success</a>
+            
+            <a class="tag" href="/tag/value/page/1/">value</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“It is better to be hated for what you are than to be loved for what you are not.”</span>
+        <span>by <small class="author" itemprop="author">André Gide</small>
+        <a href="/author/Andre-Gide">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="life,love" /    > 
+            
+            <a class="tag" href="/tag/life/page/1/">life</a>
+            
+            <a class="tag" href="/tag/love/page/1/">love</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“I have not failed. I&#39;ve just found 10,000 ways that won&#39;t work.”</span>
+        <span>by <small class="author" itemprop="author">Thomas A. Edison</small>
+        <a href="/author/Thomas-A-Edison">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="edison,failure,inspirational,paraphrased" /    > 
+            
+            <a class="tag" href="/tag/edison/page/1/">edison</a>
+            
+            <a class="tag" href="/tag/failure/page/1/">failure</a>
+            
+            <a class="tag" href="/tag/inspirational/page/1/">inspirational</a>
+            
+            <a class="tag" href="/tag/paraphrased/page/1/">paraphrased</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“A woman is like a tea bag; you never know how strong it is until it&#39;s in hot water.”</span>
+        <span>by <small class="author" itemprop="author">Eleanor Roosevelt</small>
+        <a href="/author/Eleanor-Roosevelt">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="misattributed-eleanor-roosevelt" /    > 
+            
+            <a class="tag" href="/tag/misattributed-eleanor-roosevelt/page/1/">misattributed-eleanor-roosevelt</a>
+            
+        </div>
+    </div>
+
+    <div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
+        <span class="text" itemprop="text">“A day without sunshine is like, you know, night.”</span>
+        <span>by <small class="author" itemprop="author">Steve Martin</small>
+        <a href="/author/Steve-Martin">(about)</a>
+        </span>
+        <div class="tags">
+            Tags:
+            <meta class="keywords" itemprop="keywords" content="humor,obvious,simile" /    > 
+            
+            <a class="tag" href="/tag/humor/page/1/">humor</a>
+            
+            <a class="tag" href="/tag/obvious/page/1/">obvious</a>
+            
+            <a class="tag" href="/tag/simile/page/1/">simile</a>
+            
+        </div>
+    </div>
+
+    <nav>
+        <ul class="pager">
+            
+            
+            <li class="next">
+                <a href="/page/2/">Next <span aria-hidden="true">&rarr;</span></a>
+            </li>
+            
+        </ul>
+    </nav>
+    </div>
+    <div class="col-md-4 tags-box">
+        
+            <h2>Top Ten tags</h2>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 28px" href="/tag/love/">love</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 26px" href="/tag/inspirational/">inspirational</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 26px" href="/tag/life/">life</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 24px" href="/tag/humor/">humor</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 22px" href="/tag/books/">books</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 14px" href="/tag/reading/">reading</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 10px" href="/tag/friendship/">friendship</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 8px" href="/tag/friends/">friends</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 8px" href="/tag/truth/">truth</a>
+            </span>
+            
+            <span class="tag-item">
+            <a class="tag" style="font-size: 6px" href="/tag/simile/">simile</a>
+            </span>
+            
+        
+    </div>
+</div>
+
+    </div>
+    <footer class="footer">
+        <div class="container">
+            <p class="text-muted">
+                Quotes by: <a href="https://www.goodreads.com/quotes">GoodReads.com</a>
+            </p>
+            <p class="copyright">
+                Made with <span class='sh-red'>❤</span> by <a href="https://scrapinghub.com">Scrapinghub</a>
+            </p>
+        </div>
+    </footer>
+</body>
+</html>
--- a/docs/conf.py
+++ b/docs/conf.py
@ -27,10 +27,12 @@ sys.path.insert(0, path.dirname(path.dirname(__file__)))
 # Add any Sphinx extension module names here, as strings. They can be extensions
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 extensions = [
+    'notfound.extension',
    'scrapydocs',
    'sphinx.ext.autodoc',
    'sphinx.ext.coverage',
    'sphinx.ext.intersphinx',
+    'sphinx.ext.viewcode',
 ]

 # Add any paths that contain templates here, relative to this directory.
@ -237,7 +239,7 @@ coverage_ignore_pyobjects = [
    r'\bContractsManager\b$',

    # For default contracts we only want to document their general purpose in
-    # their constructor, the methods they reimplement to achieve that purpose
+    # their __init__ method, the methods they reimplement to achieve that purpose
    # should be irrelevant to developers using those contracts.
    r'\w+Contract\.(adjust_request_args|(pre|post)_process)$',

@ -273,4 +275,5 @@ coverage_ignore_pyobjects = [

 intersphinx_mapping = {
    'python': ('https://docs.python.org/3', None),
+    'sphinx': ('https://www.sphinx-doc.org/en/stable', None),
 }
--- a/docs/conftest.py
+++ b/docs/conftest.py
@ -0,0 +1,29 @@
+import os
+from doctest import ELLIPSIS, NORMALIZE_WHITESPACE
+
+from scrapy.http.response.html import HtmlResponse
+from sybil import Sybil
+from sybil.parsers.codeblock import CodeBlockParser
+from sybil.parsers.doctest import DocTestParser
+from sybil.parsers.skip import skip
+
+
+def load_response(url, filename):
+    input_path = os.path.join(os.path.dirname(__file__), '_tests', filename)
+    with open(input_path, 'rb') as input_file:
+        return HtmlResponse(url, body=input_file.read())
+
+
+def setup(namespace):
+    namespace['load_response'] = load_response
+
+
+pytest_collect_file = Sybil(
+    parsers=[
+        DocTestParser(optionflags=ELLIPSIS | NORMALIZE_WHITESPACE),
+        CodeBlockParser(future_imports=['print_function']),
+        skip,
+    ],
+    pattern='*.rst',
+    setup=setup,
+).pytest()
--- a/docs/contributing.rst
+++ b/docs/contributing.rst
@ -177,20 +177,19 @@ Documentation policies
 ======================

 For reference documentation of API members (classes, methods, etc.) use
-docstrings and make sure that the Sphinx documentation uses the autodoc_
-extension to pull the docstrings. API reference documentation should follow
-docstring conventions (`PEP 257`_) and be IDE-friendly: short, to the point,
-and it may provide short examples.
+docstrings and make sure that the Sphinx documentation uses the
+:mod:`~sphinx.ext.autodoc` extension to pull the docstrings. API reference
+documentation should follow docstring conventions (`PEP 257`_) and be
+IDE-friendly: short, to the point, and it may provide short examples.

 Other types of documentation, such as tutorials or topics, should be covered in
 files within the ``docs/`` directory. This includes documentation that is
 specific to an API member, but goes beyond API reference documentation.

-In any case, if something is covered in a docstring, use the autodoc_
-extension to pull the docstring into the documentation instead of duplicating
-the docstring in files within the ``docs/`` directory.
-
-.. _autodoc: http://www.sphinx-doc.org/en/stable/ext/autodoc.html
+In any case, if something is covered in a docstring, use the
+:mod:`~sphinx.ext.autodoc` extension to pull the docstring into the
+documentation instead of duplicating the docstring in files within the
+``docs/`` directory.

 Tests
 =====
--- a/docs/intro/tutorial.rst
+++ b/docs/intro/tutorial.rst
@ -235,13 +235,16 @@ You will see something like::
    [s]   shelp()           Shell help (print this help)
    [s]   fetch(req_or_url) Fetch request (or URL) and update local objects
    [s]   view(response)    View response in a browser
-    >>>

 Using the shell, you can try selecting elements using `CSS`_ with the response
-object::
+object:

-    >>> response.css('title')
-    [<Selector xpath='descendant-or-self::title' data='<title>Quotes to Scrape</title>'>]
+.. invisible-code-block: python
+
+    response = load_response('http://quotes.toscrape.com/page/1/', 'quotes1.html')
+
+>>> response.css('title')
+[<Selector xpath='descendant-or-self::title' data='<title>Quotes to Scrape</title>'>]

 The result of running ``response.css('title')`` is a list-like object called
 :class:`~scrapy.selector.SelectorList`, which represents a list of
@ -372,6 +375,9 @@ we want::
 We get a list of selectors for the quote HTML elements with::

    >>> response.css("div.quote")
+    [<Selector xpath="descendant-or-self::div[@class and contains(concat(' ', normalize-space(@class), ' '), ' quote ')]" data='<div class="quote" itemscope itemtype...'>,
+     <Selector xpath="descendant-or-self::div[@class and contains(concat(' ', normalize-space(@class), ' '), ' quote ')]" data='<div class="quote" itemscope itemtype...'>,
+     ...]

 Each of the selectors returned by the query above allows us to run further
 queries over their sub-elements. Let's assign the first selector to a
@ -396,6 +402,12 @@ to get all of them::
    >>> tags
    ['change', 'deep-thoughts', 'thinking', 'world']

+.. invisible-code-block: python
+
+  from sys import version_info
+
+.. skip: next if(version_info < (3, 6), reason="Only Python 3.6+ dictionaries match the output")
+
 Having figured out how to extract each bit, we can now iterate over all the
 quotes elements and put them together into a Python dictionary::

@ -404,10 +416,9 @@ quotes elements and put them together into a Python dictionary::
    ...     author = quote.css("small.author::text").get()
    ...     tags = quote.css("div.tags a.tag::text").getall()
    ...     print(dict(text=text, author=author, tags=tags))
-    {'tags': ['change', 'deep-thoughts', 'thinking', 'world'], 'author': 'Albert Einstein', 'text': '“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”'}
-    {'tags': ['abilities', 'choices'], 'author': 'J.K. Rowling', 'text': '“It is our choices, Harry, that show what we truly are, far more than our abilities.”'}
-        ... a few more of these, omitted for brevity
-    >>>
+    {'text': '“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”', 'author': 'Albert Einstein', 'tags': ['change', 'deep-thoughts', 'thinking', 'world']}
+    {'text': '“It is our choices, Harry, that show what we truly are, far more than our abilities.”', 'author': 'J.K. Rowling', 'tags': ['abilities', 'choices']}
+    ...

 Extracting data in our spider
 -----------------------------
@ -521,7 +532,7 @@ There is also an ``attrib`` property available
 (see :ref:`selecting-attributes` for more)::

    >>> response.css('li.next a').attrib['href']
-    '/page/2'
+    '/page/2/'

 Let's see now our spider modified to recursively follow the link to the next
 page, extracting data from it::
--- a/docs/news.rst
+++ b/docs/news.rst
@ -308,12 +308,12 @@ New features
    convenient way to build JSON requests (:issue:`3504`, :issue:`3505`)

 *   A ``process_request`` callback passed to the :class:`~scrapy.spiders.Rule`
-    constructor now receives the :class:`~scrapy.http.Response` object that
+    ``__init__`` method now receives the :class:`~scrapy.http.Response` object that
    originated the request as its second argument (:issue:`3682`)

 *   A new ``restrict_text`` parameter for the
    :attr:`LinkExtractor <scrapy.linkextractors.lxmlhtml.LxmlLinkExtractor>`
-    constructor allows filtering links by linking text (:issue:`3622`,
+    ``__init__`` method allows filtering links by linking text (:issue:`3622`,
    :issue:`3635`)

 *   A new :setting:`FEED_STORAGE_S3_ACL` setting allows defining a custom ACL
@ -479,7 +479,7 @@ The following deprecated APIs have been removed (:issue:`3578`):

 *   From :class:`~scrapy.selector.Selector`:

-    *   ``_root`` (both the constructor argument and the object property, use
+    *   ``_root`` (both the ``__init__`` method argument and the object property, use
        ``root``)

    *   ``extract_unquoted`` (use ``getall``)
@ -2703,7 +2703,7 @@ Scrapy changes:
 - removed ``ENCODING_ALIASES`` setting, as encoding auto-detection has been moved to the `w3lib`_ library
 - promoted :ref:`topics-djangoitem` to main contrib
 - LogFormatter method now return dicts(instead of strings) to support lazy formatting (:issue:`164`, :commit:`dcef7b0`)
- downloader handlers (:setting:`DOWNLOAD_HANDLERS` setting) now receive settings as the first argument of the constructor
+- downloader handlers (:setting:`DOWNLOAD_HANDLERS` setting) now receive settings as the first argument of the ``__init__`` method
 - replaced memory usage acounting with (more portable) `resource`_ module, removed ``scrapy.utils.memory`` module
 - removed signal: ``scrapy.mail.mail_sent``
 - removed ``TRACK_REFS`` setting, now :ref:`trackrefs <topics-leaks-trackrefs>` is always enabled
@ -2917,7 +2917,7 @@ API changes
 - ``Request.copy()`` and ``Request.replace()`` now also copies their ``callback`` and ``errback`` attributes (#231)
 - Removed ``UrlFilterMiddleware`` from ``scrapy.contrib`` (already disabled by default)
 - Offsite middelware doesn't filter out any request coming from a spider that doesn't have a allowed_domains attribute (#225)
- Removed Spider Manager ``load()`` method. Now spiders are loaded in the constructor itself.
+- Removed Spider Manager ``load()`` method. Now spiders are loaded in the ``__init__`` method itself.
 - Changes to Scrapy Manager (now called "Crawler"):
   - ``scrapy.core.manager.ScrapyManager`` class renamed to ``scrapy.crawler.Crawler``
   - ``scrapy.core.manager.scrapymanager`` singleton moved to ``scrapy.project.crawler``
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@ -1,2 +1,3 @@
 Sphinx>=2.1
-sphinx_rtd_theme
+sphinx-notfound-page
+sphinx_rtd_theme
--- a/docs/topics/email.rst
+++ b/docs/topics/email.rst
@ -21,7 +21,7 @@ Quick example
 =============

 There are two ways to instantiate the mail sender. You can instantiate it using
-the standard constructor::
+the standard ``__init__`` method::

    from scrapy.mail import MailSender
    mailer = MailSender()
@ -111,7 +111,7 @@ uses `Twisted non-blocking IO`_, like the rest of the framework.
 Mail settings
 =============

-These settings define the default constructor values of the :class:`MailSender`
+These settings define the default ``__init__`` method values of the :class:`MailSender`
 class, and can be used to configure e-mail notifications in your project without
 writing any code (for those extensions and code that uses :class:`MailSender`).

--- a/docs/topics/exporters.rst
+++ b/docs/topics/exporters.rst
@ -87,8 +87,8 @@ described next.
 1. Declaring a serializer in the field
 --------------------------------------

-If you use :class:`~.Item` you can declare a serializer in the 
-:ref:`field metadata <topics-items-fields>`. The serializer must be 
+If you use :class:`~.Item` you can declare a serializer in the
+:ref:`field metadata <topics-items-fields>`. The serializer must be
 a callable which receives a value and returns its serialized form.

 Example::
@ -144,7 +144,7 @@ BaseItemExporter
   defining what fields to export, whether to export empty fields, or which
   encoding to use.

-   These features can be configured through the constructor arguments which
+   These features can be configured through the ``__init__`` method arguments which
   populate their respective instance attributes: :attr:`fields_to_export`,
   :attr:`export_empty_fields`, :attr:`encoding`, :attr:`indent`.

@ -246,8 +246,8 @@ XmlItemExporter
   :param item_element: The name of each item element in the exported XML.
   :type item_element: str

-   The additional keyword arguments of this constructor are passed to the
-   :class:`BaseItemExporter` constructor.
+   The additional keyword arguments of this ``__init__`` method are passed to the
+   :class:`BaseItemExporter` ``__init__`` method.

   A typical output of this exporter would be::

@ -306,9 +306,9 @@ CsvItemExporter
      multi-valued fields, if found.
   :type include_headers_line: str

-   The additional keyword arguments of this constructor are passed to the
-   :class:`BaseItemExporter` constructor, and the leftover arguments to the
-   `csv.writer`_ constructor, so you can use any ``csv.writer`` constructor
+   The additional keyword arguments of this ``__init__`` method are passed to the
+   :class:`BaseItemExporter` ``__init__`` method, and the leftover arguments to the
+   `csv.writer`_ ``__init__`` method, so you can use any ``csv.writer`` ``__init__`` method
   argument to customize this exporter.

   A typical output of this exporter would be::
@ -334,8 +334,8 @@ PickleItemExporter

   For more information, refer to the `pickle module documentation`_.

-   The additional keyword arguments of this constructor are passed to the
-   :class:`BaseItemExporter` constructor.
+   The additional keyword arguments of this ``__init__`` method are passed to the
+   :class:`BaseItemExporter` ``__init__`` method.

   Pickle isn't a human readable format, so no output examples are provided.

@ -351,8 +351,8 @@ PprintItemExporter
   :param file: the file-like object to use for exporting the data. Its ``write`` method should
                accept ``bytes`` (a disk file opened in binary mode, a ``io.BytesIO`` object, etc)

-   The additional keyword arguments of this constructor are passed to the
-   :class:`BaseItemExporter` constructor.
+   The additional keyword arguments of this ``__init__`` method are passed to the
+   :class:`BaseItemExporter` ``__init__`` method.

   A typical output of this exporter would be::

@ -367,10 +367,10 @@ JsonItemExporter
 .. class:: JsonItemExporter(file, \**kwargs)

   Exports Items in JSON format to the specified file-like object, writing all
-   objects as a list of objects. The additional constructor arguments are
-   passed to the :class:`BaseItemExporter` constructor, and the leftover
-   arguments to the `JSONEncoder`_ constructor, so you can use any
-   `JSONEncoder`_ constructor argument to customize this exporter.
+   objects as a list of objects. The additional ``__init__`` method arguments are
+   passed to the :class:`BaseItemExporter` ``__init__`` method, and the leftover
+   arguments to the `JSONEncoder`_ ``__init__`` method, so you can use any
+   `JSONEncoder`_ ``__init__`` method argument to customize this exporter.

   :param file: the file-like object to use for exporting the data. Its ``write`` method should
                accept ``bytes`` (a disk file opened in binary mode, a ``io.BytesIO`` object, etc)
@ -398,10 +398,10 @@ JsonLinesItemExporter
 .. class:: JsonLinesItemExporter(file, \**kwargs)

   Exports Items in JSON format to the specified file-like object, writing one
-   JSON-encoded item per line. The additional constructor arguments are passed
-   to the :class:`BaseItemExporter` constructor, and the leftover arguments to
-   the `JSONEncoder`_ constructor, so you can use any `JSONEncoder`_
-   constructor argument to customize this exporter.
+   JSON-encoded item per line. The additional ``__init__`` method arguments are passed
+   to the :class:`BaseItemExporter` ``__init__`` method, and the leftover arguments to
+   the `JSONEncoder`_ ``__init__`` method, so you can use any `JSONEncoder`_
+   ``__init__`` method argument to customize this exporter.

   :param file: the file-like object to use for exporting the data. Its ``write`` method should
                accept ``bytes`` (a disk file opened in binary mode, a ``io.BytesIO`` object, etc)
--- a/docs/topics/extensions.rst
+++ b/docs/topics/extensions.rst
@ -28,7 +28,7 @@ Loading & activating extensions

 Extensions are loaded and activated at startup by instantiating a single
 instance of the extension class. Therefore, all the extension initialization
-code must be performed in the class constructor (``__init__`` method).
+code must be performed in the class ``__init__`` method.

 To make an extension available, add it to the :setting:`EXTENSIONS` setting in
 your Scrapy settings. In :setting:`EXTENSIONS`, each extension is represented
--- a/docs/topics/items.rst
+++ b/docs/topics/items.rst
@ -16,12 +16,12 @@ especially in a larger project with many spiders.
 To define common output data format Scrapy provides the :class:`Item` class.
 :class:`Item` objects are simple containers used to collect the scraped data.
 They provide a `dictionary-like`_ API with a convenient syntax for declaring
-their available fields. 
+their available fields.

-Various Scrapy components use extra information provided by Items: 
+Various Scrapy components use extra information provided by Items:
 exporters look at declared fields to figure out columns to export,
 serialization can be customized using Item fields metadata, :mod:`trackref`
-tracks Item instances to help find memory leaks 
+tracks Item instances to help find memory leaks
 (see :ref:`topics-leaks-trackrefs`), etc.

 .. _dictionary-like: https://docs.python.org/2/library/stdtypes.html#dict
@ -237,7 +237,7 @@ Item objects

    Return a new Item optionally initialized from the given argument.

-    Items replicate the standard `dict API`_, including its constructor, and
+    Items replicate the standard `dict API`_, including its ``__init__`` method, and
    also provide the following additional API members:

    .. automethod:: copy
--- a/docs/topics/jobs.rst
+++ b/docs/topics/jobs.rst
@ -71,34 +71,11 @@ on cookies.
 Request serialization
 ---------------------

-Requests must be serializable by the ``pickle`` module, in order for persistence
-to work, so you should make sure that your requests are serializable.
-
-The most common issue here is to use ``lambda`` functions on request callbacks that
-can't be persisted.
-
-So, for example, this won't work::
-
-    def some_callback(self, response):
-        somearg = 'test'
-        return scrapy.Request('http://www.example.com',
-                              callback=lambda r: self.other_callback(r, somearg))
-
-    def other_callback(self, response, somearg):
-        print("the argument passed is: %s" % somearg)
-
-But this will::
-
-    def some_callback(self, response):
-        somearg = 'test'
-        return scrapy.Request('http://www.example.com',
-                              callback=self.other_callback, cb_kwargs={'somearg': somearg})
-
-    def other_callback(self, response, somearg):
-        print("the argument passed is: %s" % somearg)
+For persistence to work, :class:`~scrapy.http.Request` objects must be
+serializable with :mod:`pickle`, except for the ``callback`` and ``errback``
+values passed to their ``__init__`` method, which must be methods of the
+runnning :class:`~scrapy.spiders.Spider` class.

 If you wish to log the requests that couldn't be serialized, you can set the
 :setting:`SCHEDULER_DEBUG` setting to ``True`` in the project's settings page.
 It is ``False`` by default.
-
-.. _pickle: https://docs.python.org/library/pickle.html
--- a/docs/topics/loaders.rst
+++ b/docs/topics/loaders.rst
@ -26,7 +26,7 @@ Using Item Loaders to populate items

 To use an Item Loader, you must first instantiate it. You can either
 instantiate it with a dict-like object (e.g. Item or dict) or without one, in
-which case an Item is automatically instantiated in the Item Loader constructor
+which case an Item is automatically instantiated in the Item Loader ``__init__`` method
 using the Item class specified in the :attr:`ItemLoader.default_item_class`
 attribute.

@ -271,7 +271,7 @@ There are several ways to modify Item Loader context values:
      loader.context['unit'] = 'cm'

 2. On Item Loader instantiation (the keyword arguments of Item Loader
-   constructor are stored in the Item Loader context)::
+   ``__init__`` method are stored in the Item Loader context)::

      loader = ItemLoader(product, unit='cm')

@ -500,7 +500,7 @@ ItemLoader objects
    .. attribute:: default_item_class

        An Item class (or factory), used to instantiate items when not given in
-        the constructor.
+        the ``__init__`` method.

    .. attribute:: default_input_processor

@ -515,15 +515,15 @@ ItemLoader objects
    .. attribute:: default_selector_class

        The class used to construct the :attr:`selector` of this
-        :class:`ItemLoader`, if only a response is given in the constructor.
-        If a selector is given in the constructor this attribute is ignored.
+        :class:`ItemLoader`, if only a response is given in the ``__init__`` method.
+        If a selector is given in the ``__init__`` method this attribute is ignored.
        This attribute is sometimes overridden in subclasses.

    .. attribute:: selector

        The :class:`~scrapy.selector.Selector` object to extract data from.
-        It's either the selector given in the constructor or one created from
-        the response given in the constructor using the
+        It's either the selector given in the ``__init__`` method or one created from
+        the response given in the ``__init__`` method using the
        :attr:`default_selector_class`. This attribute is meant to be
        read-only.

@ -648,7 +648,7 @@ Here is a list of all built-in processors:
 .. class:: Identity

    The simplest processor, which doesn't do anything. It returns the original
-    values unchanged. It doesn't receive any constructor arguments, nor does it
+    values unchanged. It doesn't receive any ``__init__`` method arguments, nor does it
    accept Loader contexts.

    Example::
@ -662,7 +662,7 @@ Here is a list of all built-in processors:

    Returns the first non-null/non-empty value from the values received,
    so it's typically used as an output processor to single-valued fields.
-    It doesn't receive any constructor arguments, nor does it accept Loader contexts.
+    It doesn't receive any ``__init__`` method arguments, nor does it accept Loader contexts.

    Example::

@ -673,7 +673,7 @@ Here is a list of all built-in processors:

 .. class:: Join(separator=u' ')

-    Returns the values joined with the separator given in the constructor, which
+    Returns the values joined with the separator given in the ``__init__`` method, which
    defaults to ``u' '``. It doesn't accept Loader contexts.

    When using the default separator, this processor is equivalent to the
@ -711,7 +711,7 @@ Here is a list of all built-in processors:
    those which do, this processor will pass the currently active :ref:`Loader
    context <topics-loaders-context>` through that parameter.

-    The keyword arguments passed in the constructor are used as the default
+    The keyword arguments passed in the ``__init__`` method are used as the default
    Loader context values passed to each function call. However, the final
    Loader context values passed to functions are overridden with the currently
    active Loader context accessible through the :meth:`ItemLoader.context`
@ -755,12 +755,12 @@ Here is a list of all built-in processors:
        ['HELLO, 'THIS', 'IS', 'SCRAPY']

    As with the Compose processor, functions can receive Loader contexts, and
-    constructor keyword arguments are used as default context values. See
+    ``__init__`` method keyword arguments are used as default context values. See
    :class:`Compose` processor for more info.

 .. class:: SelectJmes(json_path)

-    Queries the value using the json path provided to the constructor and returns the output.
+    Queries the value using the json path provided to the ``__init__`` method and returns the output.
    Requires jmespath (https://github.com/jmespath/jmespath.py) to run.
    This processor takes only one input at a time.

--- a/docs/topics/logging.rst
+++ b/docs/topics/logging.rst
@ -255,18 +255,18 @@ scrapy.utils.log module
    when running custom scripts using :class:`~scrapy.crawler.CrawlerRunner`.
    In that case, its usage is not required but it's recommended.

-    If you plan on configuring the handlers yourself is still recommended you
-    call this function, passing ``install_root_handler=False``. Bear in mind
-    there won't be any log output set by default in that case.
+    Another option when running custom scripts is to manually configure the logging.
+    To do this you can use `logging.basicConfig()`_ to set a basic root handler.

-    To get you started on manually configuring logging's output, you can use
-    `logging.basicConfig()`_ to set a basic root handler. This is an example
-    on how to redirect ``INFO`` or higher messages to a file::
+    Note that :class:`~scrapy.crawler.CrawlerProcess` automatically calls ``configure_logging``,
+    so it is recommended to only use `logging.basicConfig()`_ together with
+    :class:`~scrapy.crawler.CrawlerRunner`.
+
+    This is an example on how to redirect ``INFO`` or higher messages to a file::

        import logging
        from scrapy.utils.log import configure_logging

-        configure_logging(install_root_handler=False)
        logging.basicConfig(
            filename='log.txt',
            format='%(levelname)s: %(message)s',
--- a/docs/topics/request-response.rst
+++ b/docs/topics/request-response.rst
@ -137,7 +137,7 @@ Request objects

        A string containing the URL of this request. Keep in mind that this
        attribute contains the escaped URL, so it can differ from the URL passed in
-        the constructor.
+        the ``__init__`` method.

        This attribute is read-only. To change the URL of a Request use
        :meth:`replace`.
@ -400,7 +400,7 @@ fields with form data from :class:`Response` objects.

 .. class:: FormRequest(url, [formdata, ...])

-    The :class:`FormRequest` class adds a new keyword parameter to the constructor. The
+    The :class:`FormRequest` class adds a new keyword parameter to the ``__init__`` method. The
    remaining arguments are the same as for the :class:`Request` class and are
    not documented here.

@ -473,7 +473,7 @@ fields with form data from :class:`Response` objects.
       :type dont_click: boolean

       The other parameters of this class method are passed directly to the
-       :class:`FormRequest` constructor.
+       :class:`FormRequest` ``__init__`` method.

       .. versionadded:: 0.10.3
          The ``formname`` parameter.
@ -547,7 +547,7 @@ dealing with JSON requests.

 .. class:: JsonRequest(url, [... data, dumps_kwargs])

-   The :class:`JsonRequest` class adds two new keyword parameters to the constructor. The
+   The :class:`JsonRequest` class adds two new keyword parameters to the ``__init__`` method. The
   remaining arguments are the same as for the :class:`Request` class and are
   not documented here.

@ -556,7 +556,7 @@ dealing with JSON requests.

   :param data: is any JSON serializable object that needs to be JSON encoded and assigned to body.
      if :attr:`Request.body` argument is provided this parameter will be ignored.
-      if :attr:`Request.body` argument is not provided and data argument is provided :attr:`Request.method` will be 
+      if :attr:`Request.body` argument is not provided and data argument is provided :attr:`Request.method` will be
      set to ``'POST'`` automatically.
   :type data: JSON serializable object

@ -721,7 +721,7 @@ TextResponse objects
    :class:`Response` class, which is meant to be used only for binary data,
    such as images, sounds or any media file.

-    :class:`TextResponse` objects support a new constructor argument, in
+    :class:`TextResponse` objects support a new ``__init__`` method argument, in
    addition to the base :class:`Response` objects. The remaining functionality
    is the same as for the :class:`Response` class and is not documented here.

@ -755,7 +755,7 @@ TextResponse objects
       A string with the encoding of this response. The encoding is resolved by
       trying the following mechanisms, in order:

-       1. the encoding passed in the constructor ``encoding`` argument
+       1. the encoding passed in the ``__init__`` method ``encoding`` argument

       2. the encoding declared in the Content-Type HTTP header. If this
          encoding is not valid (ie. unknown), it is ignored and the next
--- a/pytest.ini
+++ b/pytest.ini
@ -2,7 +2,24 @@
 usefixtures = chdir
 python_files=test_*.py __init__.py
 python_classes=
-addopts = --doctest-modules --assert=plain
+addopts =
+    --assert=plain
+    --doctest-modules
+    --ignore=docs/_ext
+    --ignore=docs/conf.py
+    --ignore=docs/news.rst
+    --ignore=docs/topics/commands.rst
+    --ignore=docs/topics/debug.rst
+    --ignore=docs/topics/developer-tools.rst
+    --ignore=docs/topics/dynamic-content.rst
+    --ignore=docs/topics/items.rst
+    --ignore=docs/topics/leaks.rst
+    --ignore=docs/topics/loaders.rst
+    --ignore=docs/topics/selectors.rst
+    --ignore=docs/topics/shell.rst
+    --ignore=docs/topics/stats.rst
+    --ignore=docs/topics/telnetconsole.rst
+    --ignore=docs/utils
 twisted = 1
 flake8-ignore =
    # extras
@ -30,7 +47,7 @@ flake8-ignore =
    scrapy/core/engine.py E261 E501 E128 E127 E306 E502
    scrapy/core/scheduler.py E501
    scrapy/core/scraper.py E501 E306 E261 E128 W504
-    scrapy/core/spidermw.py E501 E731 E502 E231 E126 E226
+    scrapy/core/spidermw.py E501 E731 E502 E126 E226
    scrapy/core/downloader/__init__.py F401 E501
    scrapy/core/downloader/contextfactory.py E501 E128 E126
    scrapy/core/downloader/middleware.py E501 E502
@ -175,14 +192,14 @@ flake8-ignore =
    tests/test_crawl.py E501 E741 E265
    tests/test_crawler.py F841 E306 E501
    tests/test_dependencies.py E302 F841 E501 E305
-    tests/test_downloader_handlers.py E124 E127 E128 E225 E261 E265 F401 E501 E502 E701 E711 E126 E226 E123
+    tests/test_downloader_handlers.py E124 E127 E128 E225 E261 E265 F401 E501 E502 E701 E126 E226 E123
    tests/test_downloadermiddleware.py E501
    tests/test_downloadermiddleware_ajaxcrawlable.py E302 E501
    tests/test_downloadermiddleware_cookies.py E731 E741 E501 E128 E303 E265 E126
    tests/test_downloadermiddleware_decompression.py E127
    tests/test_downloadermiddleware_defaultheaders.py E501
    tests/test_downloadermiddleware_downloadtimeout.py E501
-    tests/test_downloadermiddleware_httpcache.py E713 E501 E302 E305 F401
+    tests/test_downloadermiddleware_httpcache.py E501 E302 E305 F401
    tests/test_downloadermiddleware_httpcompression.py E501 F401 E251 E126 E123
    tests/test_downloadermiddleware_httpproxy.py F401 E501 E128
    tests/test_downloadermiddleware_redirect.py E501 E303 E128 E306 E127 E305
@ -196,13 +213,13 @@ flake8-ignore =
    tests/test_feedexport.py E501 F401 F841 E241
    tests/test_http_cookies.py E501
    tests/test_http_headers.py E302 E501
-    tests/test_http_request.py F401 E402 E501 E231 E261 E127 E128 W293 E502 E128 E502 E126 E123
+    tests/test_http_request.py F401 E402 E501 E261 E127 E128 W293 E502 E128 E502 E126 E123
    tests/test_http_response.py E501 E301 E502 E128 E265
-    tests/test_item.py E701 E128 E231 F841 E306
+    tests/test_item.py E701 E128 F841 E306
    tests/test_link.py E501
-    tests/test_linkextractors.py E501 E128 E231 E124
+    tests/test_linkextractors.py E501 E128 E124
    tests/test_loader.py E302 E501 E731 E303 E741 E128 E117 E241
-    tests/test_logformatter.py E128 E501 E231 E122 E302
+    tests/test_logformatter.py E128 E501 E122 E302
    tests/test_mail.py E302 E128 E501 E305
    tests/test_middleware.py E302 E501 E128
    tests/test_pipeline_crawl.py E131 E501 E128 E126
@ -221,8 +238,8 @@ flake8-ignore =
    tests/test_spidermiddleware_output_chain.py F401 E501 E302 W293 E226
    tests/test_spidermiddleware_referer.py F401 E501 E302 F841 E125 E201 E261 E124 E501 E241 E121
    tests/test_squeues.py E501 E302 E701 E741
-    tests/test_utils_conf.py E501 E231 E303 E128
-    tests/test_utils_console.py E302 E231
+    tests/test_utils_conf.py E501 E303 E128
+    tests/test_utils_console.py E302
    tests/test_utils_curl.py E501
    tests/test_utils_datatypes.py E402 E501 E305
    tests/test_utils_defer.py E306 E261 E501 E302 F841 E226
@ -251,4 +268,4 @@ flake8-ignore =
    tests/test_spiderloader/test_spiders/spider2.py E302
    tests/test_spiderloader/test_spiders/spider3.py E302
    tests/test_spiderloader/test_spiders/nested/spider4.py E302
-    tests/test_utils_misc/__init__.py E501 E231
+    tests/test_utils_misc/__init__.py E501
--- a/scrapy/core/scraper.py
+++ b/scrapy/core/scraper.py
@ -231,9 +231,9 @@ class Scraper(object):
                    signal=signals.item_dropped, item=item, response=response,
                    spider=spider, exception=output.value)
            else:
-                logger.error('Error processing %(item)s', {'item': item},
-                             exc_info=failure_to_exc_info(output),
-                             extra={'spider': spider})
+                logkws = self.logformatter.error(item, ex, response, spider)
+                logger.log(*logformatter_adapter(logkws), extra={'spider': spider},
+                           exc_info=failure_to_exc_info(output))
                return self.signals.send_catch_log_deferred(
                    signal=signals.item_error, item=item, response=response,
                    spider=spider, failure=output)
--- a/scrapy/core/spidermw.py
+++ b/scrapy/core/spidermw.py
@ -35,7 +35,7 @@ class SpiderMiddlewareManager(MiddlewareManager):
        self.methods['process_spider_exception'].appendleft(getattr(mw, 'process_spider_exception', None))

    def scrape_response(self, scrape_func, response, request, spider):
-        fname = lambda f:'%s.%s' % (
+        fname = lambda f: '%s.%s' % (
                f.__self__.__class__.__name__,
                f.__func__.__name__)

--- a/scrapy/downloadermiddlewares/decompression.py
+++ b/scrapy/downloadermiddlewares/decompression.py
@ -4,9 +4,9 @@ and extract the potentially compressed responses that may arrive.

 import bz2
 import gzip
-import zipfile
-import tarfile
 import logging
+import tarfile
+import zipfile
 from io import BytesIO
 from tempfile import mktemp

--- a/scrapy/exporters.py
+++ b/scrapy/exporters.py
@ -29,7 +29,7 @@ class BaseItemExporter(object):
    def _configure(self, options, dont_fail=False):
        """Configure the exporter by poping options from the ``options`` dict.
        If dont_fail is set, it won't raise an exception on unexpected options
-        (useful for using with keyword arguments in subclasses constructors)
+        (useful for using with keyword arguments in subclasses ``__init__`` methods)
        """
        self.encoding = options.pop('encoding', None)
        self.fields_to_export = options.pop('fields_to_export', None)
--- a/scrapy/extensions/feedexport.py
+++ b/scrapy/extensions/feedexport.py
@ -198,9 +198,9 @@ class FeedExporter(object):

    def __init__(self, settings):
        self.settings = settings
-        self.urifmt = settings['FEED_URI']
-        if not self.urifmt:
+        if not settings['FEED_URI']:
            raise NotConfigured
+        self.urifmt = str(settings['FEED_URI'])
        self.format = settings['FEED_FORMAT'].lower()
        self.export_encoding = settings['FEED_EXPORT_ENCODING']
        self.storages = self._load_components('FEED_STORAGES')
--- a/scrapy/http/request/init.py
+++ b/scrapy/http/request/init.py
@ -65,7 +65,7 @@ class Request(object_ref):
        s = safe_url_string(url, self.encoding)
        self._url = escape_ajax(s)

-        if ':' not in self._url:
+        if ('://' not in self._url) and (not self._url.startswith('data:')):
            raise ValueError('Missing scheme in request url: %s' % self._url)

    url = property(_get_url, obsolete_setter(_set_url, 'url'))
--- a/scrapy/linkextractors/init.py
+++ b/scrapy/linkextractors/init.py
@ -19,23 +19,26 @@ from scrapy.utils.url import (

 # common file extensions that are not followed if they occur in links
 IGNORED_EXTENSIONS = [
+    # archives
+    '7z', '7zip', 'bz2', 'rar', 'tar', 'tar.gz', 'xz', 'zip',
+
    # images
    'mng', 'pct', 'bmp', 'gif', 'jpg', 'jpeg', 'png', 'pst', 'psp', 'tif',
-    'tiff', 'ai', 'drw', 'dxf', 'eps', 'ps', 'svg',
+    'tiff', 'ai', 'drw', 'dxf', 'eps', 'ps', 'svg', 'cdr', 'ico',

    # audio
    'mp3', 'wma', 'ogg', 'wav', 'ra', 'aac', 'mid', 'au', 'aiff',

    # video
    '3gp', 'asf', 'asx', 'avi', 'mov', 'mp4', 'mpg', 'qt', 'rm', 'swf', 'wmv',
-    'm4a', 'm4v', 'flv',
+    'm4a', 'm4v', 'flv', 'webm',

    # office suites
    'xls', 'xlsx', 'ppt', 'pptx', 'pps', 'doc', 'docx', 'odt', 'ods', 'odg',
    'odp',

    # other
-    'css', 'pdf', 'exe', 'bin', 'rss', 'zip', 'rar',
+    'css', 'pdf', 'exe', 'bin', 'rss', 'dmg', 'iso', 'apk'
 ]


--- a/scrapy/logformatter.py
+++ b/scrapy/logformatter.py
@ -8,6 +8,7 @@ from scrapy.utils.request import referer_str
 SCRAPEDMSG = u"Scraped from %(src)s" + os.linesep + "%(item)s"
 DROPPEDMSG = u"Dropped: %(exception)s" + os.linesep + "%(item)s"
 CRAWLEDMSG = u"Crawled (%(status)s) %(request)s%(request_flags)s (referer: %(referer)s)%(response_flags)s"
+ERRORMSG = u"'Error processing %(item)s'"


 class LogFormatter(object):
@ -92,6 +93,16 @@ class LogFormatter(object):
            }
        }

+    def error(self, item, exception, response, spider):
+        """Logs a message when an item causes an error while it is passing through the item pipeline."""
+        return {
+            'level': logging.ERROR,
+            'msg': ERRORMSG,
+            'args': {
+                'item': item,
+            }
+        }
+
    @classmethod
    def from_crawler(cls, crawler):
        return cls()
--- a/scrapy/pqueues.py
+++ b/scrapy/pqueues.py
@ -86,9 +86,6 @@ class _SlotPriorityQueues(object):
    def __len__(self):
        return sum(len(x) for x in self.pqueues.values()) if self.pqueues else 0

-    def __contains__(self, slot):
-        return slot in self.pqueues
-

 class ScrapyPriorityQueue(PriorityQueue):
    """
--- a/scrapy/utils/datatypes.py
+++ b/scrapy/utils/datatypes.py
@ -5,9 +5,10 @@ Python Standard Library.
 This module must not depend on any module outside the Standard Library.
 """

-import copy
 import collections
+import copy
 import warnings
+from collections.abc import Mapping

 from scrapy.exceptions import ScrapyDeprecationWarning

@ -223,7 +224,7 @@ class CaselessDict(dict):
        return dict.setdefault(self, self.normkey(key), self.normvalue(def_val))

    def update(self, seq):
-        seq = seq.items() if isinstance(seq, collections.abc.Mapping) else seq
+        seq = seq.items() if isinstance(seq, Mapping) else seq
        iseq = ((self.normkey(k), self.normvalue(v)) for k, v in seq)
        super(CaselessDict, self).update(iseq)

@ -247,8 +248,9 @@ class LocalCache(collections.OrderedDict):
        self.limit = limit

    def __setitem__(self, key, value):
-        while len(self) >= self.limit:
-            self.popitem(last=False)
+        if self.limit:
+            while len(self) >= self.limit:
+                self.popitem(last=False)
        super(LocalCache, self).__setitem__(key, value)


--- a/scrapy/utils/python.py
+++ b/scrapy/utils/python.py
@ -296,7 +296,7 @@ class WeakKeyCache(object):
 def stringify_dict(dct_or_tuples, encoding='utf-8', keys_only=True):
    """Return a (new) dict with unicode keys (and values when "keys_only" is
    False) of the given dict converted to strings. ``dct_or_tuples`` can be a
-    dict or a list of tuples, like any dict constructor supports.
+    dict or a list of tuples, like any dict ``__init__`` method supports.
    """
    d = {}
    for k, v in dict(dct_or_tuples).items():
--- a/scrapy/utils/reactor.py
+++ b/scrapy/utils/reactor.py
@ -3,10 +3,10 @@ from twisted.internet import reactor, error
 def listen_tcp(portrange, host, factory):
    """Like reactor.listenTCP but tries different ports in a range."""
    assert len(portrange) <= 2, "invalid portrange: %s" % portrange
-    if not hasattr(portrange, '__iter__'):
-        return reactor.listenTCP(portrange, factory, interface=host)
    if not portrange:
        return reactor.listenTCP(0, factory, interface=host)
+    if not hasattr(portrange, '__iter__'):
+        return reactor.listenTCP(portrange, factory, interface=host)
    if len(portrange) == 1:
        return reactor.listenTCP(portrange[0], factory, interface=host)
    for x in range(portrange[0], portrange[1]+1):
--- a/sep/sep-009.rst
+++ b/sep/sep-009.rst
@ -38,7 +38,7 @@ singletons members of that object, as explained below:
  ``scrapy.core.manager.ExecutionManager``) - instantiated with a ``Settings``
  object

-   - **crawler.settings**: ``scrapy.conf.Settings`` instance (passed in the constructor)
+   - **crawler.settings**: ``scrapy.conf.Settings`` instance (passed in the ``__init__`` method)
   - **crawler.extensions**: ``scrapy.extension.ExtensionManager`` instance
   - **crawler.engine**: ``scrapy.core.engine.ExecutionEngine`` instance
      - ``crawler.engine.scheduler``
@ -55,7 +55,7 @@ singletons members of that object, as explained below:
     ``STATS_CLASS`` setting)
   - **crawler.log**: Logger class with methods replacing the current
     ``scrapy.log`` functions. Logging would be started (if enabled) on
-     ``Crawler`` constructor, so no log starting functions are required.
+     ``Crawler`` instantiation, so no log starting functions are required.

      - ``crawler.log.msg``
   - **crawler.signals**: signal handling
@ -69,12 +69,12 @@ Required code changes after singletons removal
 ==============================================

 All components (extensions, middlewares, etc) will receive this ``Crawler``
-object in their constructors, and this will be the only mechanism for accessing
+object in their ``__init__`` methods, and this will be the only mechanism for accessing
 any other components (as opposed to importing each singleton from their
 respective module). This will also serve to stabilize the core API, something
 which we haven't documented so far (partly because of this).

-So, for a typical middleware constructor code, instead of this:
+So, for a typical middleware ``__init__`` method code, instead of this:

 ::

@ -125,13 +125,13 @@ Open issues to resolve

 - Should we pass ``Settings`` object to ``ScrapyCommand.add_options()``?
 - How should spiders access settings?
-   - Option 1. Pass ``Crawler`` object to spider constructors too
+   - Option 1. Pass ``Crawler`` object to spider ``__init__`` methods too
      - pro: one way to access all components (settings and signals being the
        most relevant to spiders)
      - con?: spider code can access (and control) any crawler component -
        since we don't want to support spiders messing with the crawler (write
        an extension or spider middleware if you need that)
-   - Option 2. Pass ``Settings`` object to spider constructors, which would
+   - Option 2. Pass ``Settings`` object to spider ``__init__`` methods, which would
     then be accessed through ``self.settings``, like logging which is accessed
     through ``self.log``

--- a/tests/requirements-py3.txt
+++ b/tests/requirements-py3.txt
@ -6,6 +6,7 @@ pytest
 pytest-cov
 pytest-twisted
 pytest-xdist
+sybil
 testfixtures

 # optional for shell wrapper tests
--- a/tests/test_downloader_handlers.py
+++ b/tests/test_downloader_handlers.py
@ -614,7 +614,7 @@ class Http11MockServerTestCase(unittest.TestCase):
        crawler = get_crawler(SingleRequestSpider)
        yield crawler.crawl(seed=Request(url=self.mockserver.url('')))
        failure = crawler.spider.meta.get('failure')
-        self.assertTrue(failure == None)
+        self.assertTrue(failure is None)
        reason = crawler.spider.meta['close_reason']
        self.assertTrue(reason, 'finished')

@ -636,7 +636,7 @@ class Http11MockServerTestCase(unittest.TestCase):
        yield crawler.crawl(seed=request)
        # download_maxsize = 50 is enough for the gzipped response
        failure = crawler.spider.meta.get('failure')
-        self.assertTrue(failure == None)
+        self.assertTrue(failure is None)
        reason = crawler.spider.meta['close_reason']
        self.assertTrue(reason, 'finished')

--- a/tests/test_downloadermiddleware_httpcache.py
+++ b/tests/test_downloadermiddleware_httpcache.py
@ -84,8 +84,8 @@ class _BaseTest(unittest.TestCase):

    def assertEqualRequestButWithCacheValidators(self, request1, request2):
        self.assertEqual(request1.url, request2.url)
-        assert not b'If-None-Match' in request1.headers
-        assert not b'If-Modified-Since' in request1.headers
+        assert b'If-None-Match' not in request1.headers
+        assert b'If-Modified-Since' not in request1.headers
        assert any(h in request2.headers for h in (b'If-None-Match', b'If-Modified-Since'))
        self.assertEqual(request1.body, request2.body)

--- a/tests/test_feedexport.py
+++ b/tests/test_feedexport.py
@ -6,6 +6,7 @@ import tempfile
 import shutil
 import string
 from io import BytesIO
+from pathlib import Path
 from unittest import mock
 from urllib.parse import urljoin, urlparse, quote
 from urllib.request import pathname2url
@ -403,6 +404,7 @@ class FeedExportTest(unittest.TestCase):
        defaults = {
            'FEED_URI': res_uri,
            'FEED_FORMAT': 'csv',
+            'FEED_PATH': res_path
        }
        defaults.update(settings or {})
        try:
@ -411,7 +413,7 @@ class FeedExportTest(unittest.TestCase):
                spider_cls.start_urls = [s.url('/')]
                yield runner.crawl(spider_cls)

-            with open(res_path, 'rb') as f:
+            with open(str(defaults['FEED_PATH']), 'rb') as f:
                content = f.read()

        finally:
@ -841,3 +843,17 @@ class FeedExportTest(unittest.TestCase):
        yield self.exported_data({}, settings)
        self.assertTrue(FromCrawlerCsvItemExporter.init_with_crawler)
        self.assertTrue(FromCrawlerFileFeedStorage.init_with_crawler)
+
+    @defer.inlineCallbacks
+    def test_pathlib_uri(self):
+        tmpdir = tempfile.mkdtemp()
+        feed_uri = Path(tmpdir) / 'res'
+        settings = {
+            'FEED_FORMAT': 'csv',
+            'FEED_STORE_EMPTY': True,
+            'FEED_URI': feed_uri,
+            'FEED_PATH': feed_uri
+        }
+        data = yield self.exported_no_data(settings)
+        self.assertEqual(data, b'')
+        shutil.rmtree(tmpdir, ignore_errors=True)
--- a/tests/test_http_request.py
+++ b/tests/test_http_request.py
@ -3,7 +3,7 @@ import cgi
 import unittest
 import re
 import json
-import xmlrpc.client as xmlrpclib
+import xmlrpc.client
 import warnings
 from unittest import mock
 from urllib.parse import parse_qs, unquote_to_bytes, urlparse
@ -20,7 +20,7 @@ class RequestTest(unittest.TestCase):
    default_meta = {}

    def test_init(self):
-        # Request requires url in the constructor
+        # Request requires url in the __init__ method
        self.assertRaises(Exception, self.request_class)

        # url argument must be basestring
@ -47,11 +47,13 @@ class RequestTest(unittest.TestCase):

    def test_url_no_scheme(self):
        self.assertRaises(ValueError, self.request_class, 'foo')
+        self.assertRaises(ValueError, self.request_class, '/foo/')
+        self.assertRaises(ValueError, self.request_class, '/foo:bar')

    def test_headers(self):
        # Different ways of setting headers attribute
        url = 'http://www.scrapy.org'
-        headers = {b'Accept':'gzip', b'Custom-Header':'nothing to tell you'}
+        headers = {b'Accept': 'gzip', b'Custom-Header': 'nothing to tell you'}
        r = self.request_class(url=url, headers=headers)
        p = self.request_class(url=url, headers=r.headers)

@ -495,7 +497,7 @@ class FormRequestTest(RequestTest):
                formdata=(('foo', 'bar'), ('foo', 'baz')))
        self.assertEqual(urlparse(req.url).hostname, 'www.example.com')
        self.assertEqual(urlparse(req.url).query, 'foo=bar&foo=baz')
-    
+
    def test_from_response_override_duplicate_form_key(self):
        response = _buildresponse(
            """<form action="get.php" method="POST">
@ -652,7 +654,7 @@ class FormRequestTest(RequestTest):
        req = self.request_class.from_response(response, dont_click=True)
        fs = _qs(req)
        self.assertEqual(fs, {b'i1': [b'i1v'], b'i2': [b'i2v']})
-    
+
    def test_from_response_clickdata_does_not_ignore_image(self):
        response = _buildresponse(
            """<form>
@ -811,7 +813,7 @@ class FormRequestTest(RequestTest):
            <input type="hidden" name="one" value="1">
            <input type="hidden" name="two" value="2">
            </form>""")
-        r1 = self.request_class.from_response(response, formdata={'two':'3'})
+        r1 = self.request_class.from_response(response, formdata={'two': '3'})
        self.assertEqual(r1.method, 'POST')
        self.assertEqual(r1.headers['Content-type'], b'application/x-www-form-urlencoded')
        fs = _qs(r1)
@ -1218,7 +1220,7 @@ class XmlRpcRequestTest(RequestTest):
        r = self.request_class('http://scrapytest.org/rpc2', **kwargs)
        self.assertEqual(r.headers[b'Content-Type'], b'text/xml')
        self.assertEqual(r.body,
-                         to_bytes(xmlrpclib.dumps(**kwargs),
+                         to_bytes(xmlrpc.client.dumps(**kwargs),
                                  encoding=kwargs.get('encoding', 'utf-8')))
        self.assertEqual(r.method, 'POST')
        self.assertEqual(r.encoding, kwargs.get('encoding', 'utf-8'))
--- a/tests/test_http_response.py
+++ b/tests/test_http_response.py
@ -532,7 +532,7 @@ class XmlResponseTest(TextResponseTest):
        r2 = self.response_class("http://www.example.com", body=body)
        self._assert_response_values(r2, 'iso-8859-1', body)

-        # make sure replace() preserves the explicit encoding passed in the constructor
+        # make sure replace() preserves the explicit encoding passed in the __init__ method
        body = b"""<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>"""
        r3 = self.response_class("http://www.example.com", body=body, encoding='utf-8')
        body2 = b"New body"
--- a/tests/test_item.py
+++ b/tests/test_item.py
@ -239,7 +239,7 @@ class ItemTest(unittest.TestCase):
    def test_copy(self):
        class TestItem(Item):
            name = Field()
-        item = TestItem({'name':'lower'})
+        item = TestItem({'name': 'lower'})
        copied_item = item.copy()
        self.assertNotEqual(id(item), id(copied_item))
        copied_item['name'] = copied_item['name'].upper()
--- a/tests/test_link.py
+++ b/tests/test_link.py
@ -43,6 +43,6 @@ class LinkTest(unittest.TestCase):
        l2 = eval(repr(l1))
        self._assert_same_links(l1, l2)

-    def test_non_str_url_py2(self):
+    def test_bytes_url(self):
        with self.assertRaises(TypeError):
            Link(b"http://www.example.com/\xc2\xa3")
--- a/tests/test_linkextractors.py
+++ b/tests/test_linkextractors.py
@ -322,7 +322,7 @@ class Base:
                Link(url=page4_url, text=u'href with whitespaces'),
            ])

-            lx = self.extractor_cls(attrs=("href","src"), tags=("a","area","img"), deny_extensions=())
+            lx = self.extractor_cls(attrs=("href", "src"), tags=("a", "area", "img"), deny_extensions=())
            self.assertEqual(lx.extract_links(self.response), [
                Link(url='http://example.com/sample1.html', text=u''),
                Link(url='http://example.com/sample2.html', text=u'sample 2'),
@ -360,7 +360,7 @@ class Base:
                Link(url='http://example.com/sample2.html', text=u'sample 2'),
            ])

-            lx = self.extractor_cls(tags=("a","img"), attrs=("href", "src"), deny_extensions=())
+            lx = self.extractor_cls(tags=("a", "img"), attrs=("href", "src"), deny_extensions=())
            self.assertEqual(lx.extract_links(response), [
                Link(url='http://example.com/sample2.html', text=u'sample 2'),
                Link(url='http://example.com/sample2.jpg', text=u''),
--- a/tests/test_loader.py
+++ b/tests/test_loader.py
@ -725,11 +725,11 @@ class SelectortemLoaderTest(unittest.TestCase):
    </html>
    """)

-    def test_constructor(self):
+    def test_init_method(self):
        l = TestItemLoader()
        self.assertEqual(l.selector, None)

-    def test_constructor_errors(self):
+    def test_init_method_errors(self):
        l = TestItemLoader()
        self.assertRaises(RuntimeError, l.add_xpath, 'url', '//a/@href')
        self.assertRaises(RuntimeError, l.replace_xpath, 'url', '//a/@href')
@ -738,7 +738,7 @@ class SelectortemLoaderTest(unittest.TestCase):
        self.assertRaises(RuntimeError, l.replace_css, 'name', '#name::text')
        self.assertRaises(RuntimeError, l.get_css, '#name::text')

-    def test_constructor_with_selector(self):
+    def test_init_method_with_selector(self):
        sel = Selector(text=u"<html><body><div>marta</div></body></html>")
        l = TestItemLoader(selector=sel)
        self.assertIs(l.selector, sel)
@ -746,7 +746,7 @@ class SelectortemLoaderTest(unittest.TestCase):
        l.add_xpath('name', '//div/text()')
        self.assertEqual(l.get_output_value('name'), [u'Marta'])

-    def test_constructor_with_selector_css(self):
+    def test_init_method_with_selector_css(self):
        sel = Selector(text=u"<html><body><div>marta</div></body></html>")
        l = TestItemLoader(selector=sel)
        self.assertIs(l.selector, sel)
@ -754,14 +754,14 @@ class SelectortemLoaderTest(unittest.TestCase):
        l.add_css('name', 'div::text')
        self.assertEqual(l.get_output_value('name'), [u'Marta'])

-    def test_constructor_with_response(self):
+    def test_init_method_with_response(self):
        l = TestItemLoader(response=self.response)
        self.assertTrue(l.selector)

        l.add_xpath('name', '//div/text()')
        self.assertEqual(l.get_output_value('name'), [u'Marta'])

-    def test_constructor_with_response_css(self):
+    def test_init_method_with_response_css(self):
        l = TestItemLoader(response=self.response)
        self.assertTrue(l.selector)

--- a/tests/test_logformatter.py
+++ b/tests/test_logformatter.py
@ -22,13 +22,13 @@ class CustomItem(Item):
        return "name: %s" % self['name']


-class LoggingContribTest(unittest.TestCase):
+class LogFormatterTestCase(unittest.TestCase):

    def setUp(self):
        self.formatter = LogFormatter()
        self.spider = Spider('default')

-    def test_crawled(self):
+    def test_crawled_with_referer(self):
        req = Request("http://www.example.com")
        res = Response("http://www.example.com")
        logkws = self.formatter.crawled(req, res, self.spider)
@ -36,6 +36,7 @@ class LoggingContribTest(unittest.TestCase):
        self.assertEqual(logline,
            "Crawled (200) <GET http://www.example.com> (referer: None)")

+    def test_crawled_without_referer(self):
        req = Request("http://www.example.com", headers={'referer': 'http://example.com'})
        res = Response("http://www.example.com", flags=['cached'])
        logkws = self.formatter.crawled(req, res, self.spider)
@ -44,7 +45,7 @@ class LoggingContribTest(unittest.TestCase):
            "Crawled (200) <GET http://www.example.com> (referer: http://example.com) ['cached']")

    def test_flags_in_request(self):
-        req = Request("http://www.example.com", flags=['test','flag'])
+        req = Request("http://www.example.com", flags=['test', 'flag'])
        res = Response("http://www.example.com")
        logkws = self.formatter.crawled(req, res, self.spider)
        logline = logkws['msg'] % logkws['args']
@ -61,6 +62,16 @@ class LoggingContribTest(unittest.TestCase):
        assert all(isinstance(x, str) for x in lines)
        self.assertEqual(lines, [u"Dropped: \u2018", '{}'])

+    def test_error(self):
+        # In practice, the complete traceback is shown by passing the
+        # 'exc_info' argument to the logging function
+        item = {'key': 'value'}
+        exception = Exception()
+        response = Response("http://www.example.com")
+        logkws = self.formatter.error(item, exception, response, self.spider)
+        logline = logkws['msg'] % logkws['args']
+        self.assertEqual(logline, u"'Error processing {'key': 'value'}'")
+
    def test_scraped(self):
        item = CustomItem()
        item['name'] = u'\xa3'
@ -74,26 +85,46 @@ class LoggingContribTest(unittest.TestCase):

 class LogFormatterSubclass(LogFormatter):
    def crawled(self, request, response, spider):
-        kwargs = super(LogFormatterSubclass, self).crawled(
-        request, response, spider)
+        kwargs = super(LogFormatterSubclass, self).crawled(request, response, spider)
        CRAWLEDMSG = (
-            u"Crawled (%(status)s) %(request)s (referer: "
-            u"%(referer)s)%(flags)s"
+            u"Crawled (%(status)s) %(request)s (referer: %(referer)s) %(flags)s"
        )
+        log_args = kwargs['args']
+        log_args['flags'] = str(request.flags)
        return {
            'level': kwargs['level'],
            'msg': CRAWLEDMSG,
-            'args': kwargs['args']
+            'args': log_args,
        }


-class LogformatterSubclassTest(LoggingContribTest):
+class LogformatterSubclassTest(LogFormatterTestCase):
    def setUp(self):
        self.formatter = LogFormatterSubclass()
        self.spider = Spider('default')

+    def test_crawled_with_referer(self):
+        req = Request("http://www.example.com")
+        res = Response("http://www.example.com")
+        logkws = self.formatter.crawled(req, res, self.spider)
+        logline = logkws['msg'] % logkws['args']
+        self.assertEqual(logline,
+            "Crawled (200) <GET http://www.example.com> (referer: None) []")
+
+    def test_crawled_without_referer(self):
+        req = Request("http://www.example.com", headers={'referer': 'http://example.com'}, flags=['cached'])
+        res = Response("http://www.example.com")
+        logkws = self.formatter.crawled(req, res, self.spider)
+        logline = logkws['msg'] % logkws['args']
+        self.assertEqual(logline,
+            "Crawled (200) <GET http://www.example.com> (referer: http://example.com) ['cached']")
+
    def test_flags_in_request(self):
-        pass
+        req = Request("http://www.example.com", flags=['test', 'flag'])
+        res = Response("http://www.example.com")
+        logkws = self.formatter.crawled(req, res, self.spider)
+        logline = logkws['msg'] % logkws['args']
+        self.assertEqual(logline, "Crawled (200) <GET http://www.example.com> (referer: None) ['test', 'flag']")


 class SkipMessagesLogFormatter(LogFormatter):
--- a/tests/test_spider.py
+++ b/tests/test_spider.py
@ -41,12 +41,12 @@ class SpiderTest(unittest.TestCase):
        self.assertEqual(list(start_requests), [])

    def test_spider_args(self):
-        """Constructor arguments are assigned to spider attributes"""
+        """``__init__`` method arguments are assigned to spider attributes"""
        spider = self.spider_class('example.com', foo='bar')
        self.assertEqual(spider.foo, 'bar')

    def test_spider_without_name(self):
-        """Constructor arguments are assigned to spider attributes"""
+        """``__init__`` method arguments are assigned to spider attributes"""
        self.assertRaises(ValueError, self.spider_class)
        self.assertRaises(ValueError, self.spider_class, somearg='foo')

--- a/tests/test_utils_conf.py
+++ b/tests/test_utils_conf.py
@ -79,7 +79,7 @@ class BuildComponentListTest(unittest.TestCase):
        self.assertRaises(ValueError, build_component_list, {}, d, convert=lambda x: x)
        d = {'one': {'a': 'a', 'b': 2}}
        self.assertRaises(ValueError, build_component_list, {}, d, convert=lambda x: x)
-        d = {'one': 'lorem ipsum',}
+        d = {'one': 'lorem ipsum'}
        self.assertRaises(ValueError, build_component_list, {}, d, convert=lambda x: x)


--- a/tests/test_utils_console.py
+++ b/tests/test_utils_console.py
@ -21,7 +21,7 @@ class UtilsConsoleTestCase(unittest.TestCase):
        shell = get_shell_embed_func(['invalid'])
        self.assertEqual(shell, None)

-        shell = get_shell_embed_func(['invalid','python'])
+        shell = get_shell_embed_func(['invalid', 'python'])
        self.assertTrue(callable(shell))
        self.assertEqual(shell.__name__, '_embed_standard_shell')

--- a/tests/test_utils_datatypes.py
+++ b/tests/test_utils_datatypes.py
@ -1,8 +1,8 @@
-from collections.abc import Mapping, MutableMapping
 import copy
 import unittest
+from collections.abc import Mapping, MutableMapping

-from scrapy.utils.datatypes import CaselessDict, SequenceExclude
+from scrapy.utils.datatypes import CaselessDict, LocalCache, SequenceExclude


 __doctests__ = ['scrapy.utils.datatypes']
@ -229,5 +229,31 @@ class SequenceExcludeTest(unittest.TestCase):
        for v in [-3, "test", 1.1]:
            self.assertNotIn(v, d)

+
+class LocalCacheTest(unittest.TestCase):
+
+    def test_cache_with_limit(self):
+        cache = LocalCache(limit=2)
+        cache['a'] = 1
+        cache['b'] = 2
+        cache['c'] = 3
+        self.assertEqual(len(cache), 2)
+        self.assertNotIn('a', cache)
+        self.assertIn('b', cache)
+        self.assertIn('c', cache)
+        self.assertEqual(cache['b'], 2)
+        self.assertEqual(cache['c'], 3)
+
+    def test_cache_without_limit(self):
+        maximum = 10**4
+        cache = LocalCache()
+        for x in range(maximum):
+            cache[str(x)] = x
+        self.assertEqual(len(cache), maximum)
+        for x in range(maximum):
+            self.assertIn(str(x), cache)
+            self.assertEqual(cache[str(x)], x)
+
+
 if __name__ == "__main__":
    unittest.main()
--- a/tests/test_utils_misc/init.py
+++ b/tests/test_utils_misc/init.py
@ -74,7 +74,7 @@ class UtilsMiscTestCase(unittest.TestCase):
        self.assertEqual(list(arg_to_iter(100)), [100])
        self.assertEqual(list(arg_to_iter(l for l in 'abc')), ['a', 'b', 'c'])
        self.assertEqual(list(arg_to_iter([1, 2, 3])), [1, 2, 3])
-        self.assertEqual(list(arg_to_iter({'a':1})), [{'a': 1}])
+        self.assertEqual(list(arg_to_iter({'a': 1})), [{'a': 1}])
        self.assertEqual(list(arg_to_iter(TestItem(name="john"))), [TestItem(name="john")])

    def test_create_instance(self):
--- a/tests/test_utils_python.py
+++ b/tests/test_utils_python.py
@ -205,10 +205,10 @@ class UtilsPythonTestCase(unittest.TestCase):
            self.assertEqual(get_func_args(operator.itemgetter(2)), [])
        else:
            self.assertEqual(
-                get_func_args(str.split, True), ['sep', 'maxsplit'])
-            self.assertEqual(get_func_args(" ".join, True), ['list'])
+                get_func_args(str.split, stripself=True), ['sep', 'maxsplit'])
+            self.assertEqual(get_func_args(" ".join, stripself=True), ['list'])
            self.assertEqual(
-                get_func_args(operator.itemgetter(2), True), ['obj'])
+                get_func_args(operator.itemgetter(2), stripself=True), ['obj'])


    def test_without_none_values(self):
--- a/tox.ini
+++ b/tox.ini
@ -21,7 +21,7 @@ passenv =
    GCS_TEST_FILE_URI
    GCS_PROJECT_ID
 commands =
-    py.test --cov=scrapy --cov-report= {posargs:scrapy tests}
+    py.test --cov=scrapy --cov-report= {posargs:--durations=10 docs scrapy tests}

 [testenv:py35]
 basepython = python3.5
@ -60,7 +60,14 @@ basepython = python3.8
 [testenv:pypy3]
 basepython = pypy3
 commands =
-    py.test {posargs:scrapy tests}
+    py.test {posargs:--durations=10 docs scrapy tests}
+
+[testenv:security]
+basepython = python3.8
+deps =
+    bandit
+commands =
+    bandit -r -c .bandit.yml {posargs:scrapy}

 [testenv:flake8]
 basepython = python3.8
@ -68,7 +75,7 @@ deps =
    {[testenv]deps}
    pytest-flake8
 commands =
-    py.test --flake8 {posargs:scrapy tests}
+    py.test --flake8 {posargs:docs scrapy tests}

 [docs]
 changedir = docs