mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-22 22:43:43 +00:00
Merge remote-tracking branch 'upstream/master' into remove-six-code
This commit is contained in:
commit
05785c1c17
16
.bandit.yml
Normal file
16
.bandit.yml
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
skips:
|
||||||
|
- B101
|
||||||
|
- B105
|
||||||
|
- B303
|
||||||
|
- B306
|
||||||
|
- B307
|
||||||
|
- B311
|
||||||
|
- B320
|
||||||
|
- B321
|
||||||
|
- B402
|
||||||
|
- B404
|
||||||
|
- B406
|
||||||
|
- B410
|
||||||
|
- B503
|
||||||
|
- B603
|
||||||
|
- B605
|
@ -7,6 +7,8 @@ branches:
|
|||||||
- /^\d\.\d+\.\d+(rc\d+|\.dev\d+)?$/
|
- /^\d\.\d+\.\d+(rc\d+|\.dev\d+)?$/
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
|
- env: TOXENV=security
|
||||||
|
python: 3.8
|
||||||
- env: TOXENV=flake8
|
- env: TOXENV=flake8
|
||||||
python: 3.8
|
python: 3.8
|
||||||
- env: TOXENV=pypy3
|
- env: TOXENV=pypy3
|
||||||
|
@ -68,7 +68,7 @@ members of the project's leadership.
|
|||||||
## Attribution
|
## Attribution
|
||||||
|
|
||||||
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
|
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
|
||||||
available at [http://contributor-covenant.org/version/1/4][version]
|
available at [http://contributor-covenant.org/version/1/4][version].
|
||||||
|
|
||||||
[homepage]: http://contributor-covenant.org
|
[homepage]: http://contributor-covenant.org
|
||||||
[version]: http://contributor-covenant.org/version/1/4/
|
[version]: http://contributor-covenant.org/version/1/4/
|
||||||
|
18
README.rst
18
README.rst
@ -34,8 +34,8 @@ Scrapy is a fast high-level web crawling and web scraping framework, used to
|
|||||||
crawl websites and extract structured data from their pages. It can be used for
|
crawl websites and extract structured data from their pages. It can be used for
|
||||||
a wide range of purposes, from data mining to monitoring and automated testing.
|
a wide range of purposes, from data mining to monitoring and automated testing.
|
||||||
|
|
||||||
For more information including a list of features check the Scrapy homepage at:
|
Check the Scrapy homepage at https://scrapy.org for more information,
|
||||||
https://scrapy.org
|
including a list of features.
|
||||||
|
|
||||||
Requirements
|
Requirements
|
||||||
============
|
============
|
||||||
@ -50,8 +50,8 @@ The quick way::
|
|||||||
|
|
||||||
pip install scrapy
|
pip install scrapy
|
||||||
|
|
||||||
For more details see the install section in the documentation:
|
See the install section in the documentation at
|
||||||
https://docs.scrapy.org/en/latest/intro/install.html
|
https://docs.scrapy.org/en/latest/intro/install.html for more details.
|
||||||
|
|
||||||
Documentation
|
Documentation
|
||||||
=============
|
=============
|
||||||
@ -62,17 +62,17 @@ directory.
|
|||||||
Releases
|
Releases
|
||||||
========
|
========
|
||||||
|
|
||||||
You can find release notes at https://docs.scrapy.org/en/latest/news.html
|
You can check https://docs.scrapy.org/en/latest/news.html for the release notes.
|
||||||
|
|
||||||
Community (blog, twitter, mail list, IRC)
|
Community (blog, twitter, mail list, IRC)
|
||||||
=========================================
|
=========================================
|
||||||
|
|
||||||
See https://scrapy.org/community/
|
See https://scrapy.org/community/ for details.
|
||||||
|
|
||||||
Contributing
|
Contributing
|
||||||
============
|
============
|
||||||
|
|
||||||
See https://docs.scrapy.org/en/master/contributing.html
|
See https://docs.scrapy.org/en/master/contributing.html for details.
|
||||||
|
|
||||||
Code of Conduct
|
Code of Conduct
|
||||||
---------------
|
---------------
|
||||||
@ -86,9 +86,9 @@ Please report unacceptable behavior to opensource@scrapinghub.com.
|
|||||||
Companies using Scrapy
|
Companies using Scrapy
|
||||||
======================
|
======================
|
||||||
|
|
||||||
See https://scrapy.org/companies/
|
See https://scrapy.org/companies/ for a list.
|
||||||
|
|
||||||
Commercial Support
|
Commercial Support
|
||||||
==================
|
==================
|
||||||
|
|
||||||
See https://scrapy.org/support/
|
See https://scrapy.org/support/ for details.
|
||||||
|
281
docs/_tests/quotes1.html
Normal file
281
docs/_tests/quotes1.html
Normal file
@ -0,0 +1,281 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Quotes to Scrape</title>
|
||||||
|
<link rel="stylesheet" href="/static/bootstrap.min.css">
|
||||||
|
<link rel="stylesheet" href="/static/main.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<div class="row header-box">
|
||||||
|
<div class="col-md-8">
|
||||||
|
<h1>
|
||||||
|
<a href="/" style="text-decoration: none">Quotes to Scrape</a>
|
||||||
|
</h1>
|
||||||
|
</div>
|
||||||
|
<div class="col-md-4">
|
||||||
|
<p>
|
||||||
|
|
||||||
|
<a href="/login">Login</a>
|
||||||
|
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
<div class="row">
|
||||||
|
<div class="col-md-8">
|
||||||
|
|
||||||
|
<div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
|
||||||
|
<span class="text" itemprop="text">“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”</span>
|
||||||
|
<span>by <small class="author" itemprop="author">Albert Einstein</small>
|
||||||
|
<a href="/author/Albert-Einstein">(about)</a>
|
||||||
|
</span>
|
||||||
|
<div class="tags">
|
||||||
|
Tags:
|
||||||
|
<meta class="keywords" itemprop="keywords" content="change,deep-thoughts,thinking,world" / >
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/change/page/1/">change</a>
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/deep-thoughts/page/1/">deep-thoughts</a>
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/thinking/page/1/">thinking</a>
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/world/page/1/">world</a>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
|
||||||
|
<span class="text" itemprop="text">“It is our choices, Harry, that show what we truly are, far more than our abilities.”</span>
|
||||||
|
<span>by <small class="author" itemprop="author">J.K. Rowling</small>
|
||||||
|
<a href="/author/J-K-Rowling">(about)</a>
|
||||||
|
</span>
|
||||||
|
<div class="tags">
|
||||||
|
Tags:
|
||||||
|
<meta class="keywords" itemprop="keywords" content="abilities,choices" / >
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/abilities/page/1/">abilities</a>
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/choices/page/1/">choices</a>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
|
||||||
|
<span class="text" itemprop="text">“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”</span>
|
||||||
|
<span>by <small class="author" itemprop="author">Albert Einstein</small>
|
||||||
|
<a href="/author/Albert-Einstein">(about)</a>
|
||||||
|
</span>
|
||||||
|
<div class="tags">
|
||||||
|
Tags:
|
||||||
|
<meta class="keywords" itemprop="keywords" content="inspirational,life,live,miracle,miracles" / >
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/inspirational/page/1/">inspirational</a>
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/life/page/1/">life</a>
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/live/page/1/">live</a>
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/miracle/page/1/">miracle</a>
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/miracles/page/1/">miracles</a>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
|
||||||
|
<span class="text" itemprop="text">“The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.”</span>
|
||||||
|
<span>by <small class="author" itemprop="author">Jane Austen</small>
|
||||||
|
<a href="/author/Jane-Austen">(about)</a>
|
||||||
|
</span>
|
||||||
|
<div class="tags">
|
||||||
|
Tags:
|
||||||
|
<meta class="keywords" itemprop="keywords" content="aliteracy,books,classic,humor" / >
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/aliteracy/page/1/">aliteracy</a>
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/books/page/1/">books</a>
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/classic/page/1/">classic</a>
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/humor/page/1/">humor</a>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
|
||||||
|
<span class="text" itemprop="text">“Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring.”</span>
|
||||||
|
<span>by <small class="author" itemprop="author">Marilyn Monroe</small>
|
||||||
|
<a href="/author/Marilyn-Monroe">(about)</a>
|
||||||
|
</span>
|
||||||
|
<div class="tags">
|
||||||
|
Tags:
|
||||||
|
<meta class="keywords" itemprop="keywords" content="be-yourself,inspirational" / >
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/be-yourself/page/1/">be-yourself</a>
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/inspirational/page/1/">inspirational</a>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
|
||||||
|
<span class="text" itemprop="text">“Try not to become a man of success. Rather become a man of value.”</span>
|
||||||
|
<span>by <small class="author" itemprop="author">Albert Einstein</small>
|
||||||
|
<a href="/author/Albert-Einstein">(about)</a>
|
||||||
|
</span>
|
||||||
|
<div class="tags">
|
||||||
|
Tags:
|
||||||
|
<meta class="keywords" itemprop="keywords" content="adulthood,success,value" / >
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/adulthood/page/1/">adulthood</a>
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/success/page/1/">success</a>
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/value/page/1/">value</a>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
|
||||||
|
<span class="text" itemprop="text">“It is better to be hated for what you are than to be loved for what you are not.”</span>
|
||||||
|
<span>by <small class="author" itemprop="author">André Gide</small>
|
||||||
|
<a href="/author/Andre-Gide">(about)</a>
|
||||||
|
</span>
|
||||||
|
<div class="tags">
|
||||||
|
Tags:
|
||||||
|
<meta class="keywords" itemprop="keywords" content="life,love" / >
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/life/page/1/">life</a>
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/love/page/1/">love</a>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
|
||||||
|
<span class="text" itemprop="text">“I have not failed. I've just found 10,000 ways that won't work.”</span>
|
||||||
|
<span>by <small class="author" itemprop="author">Thomas A. Edison</small>
|
||||||
|
<a href="/author/Thomas-A-Edison">(about)</a>
|
||||||
|
</span>
|
||||||
|
<div class="tags">
|
||||||
|
Tags:
|
||||||
|
<meta class="keywords" itemprop="keywords" content="edison,failure,inspirational,paraphrased" / >
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/edison/page/1/">edison</a>
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/failure/page/1/">failure</a>
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/inspirational/page/1/">inspirational</a>
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/paraphrased/page/1/">paraphrased</a>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
|
||||||
|
<span class="text" itemprop="text">“A woman is like a tea bag; you never know how strong it is until it's in hot water.”</span>
|
||||||
|
<span>by <small class="author" itemprop="author">Eleanor Roosevelt</small>
|
||||||
|
<a href="/author/Eleanor-Roosevelt">(about)</a>
|
||||||
|
</span>
|
||||||
|
<div class="tags">
|
||||||
|
Tags:
|
||||||
|
<meta class="keywords" itemprop="keywords" content="misattributed-eleanor-roosevelt" / >
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/misattributed-eleanor-roosevelt/page/1/">misattributed-eleanor-roosevelt</a>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="quote" itemscope itemtype="http://schema.org/CreativeWork">
|
||||||
|
<span class="text" itemprop="text">“A day without sunshine is like, you know, night.”</span>
|
||||||
|
<span>by <small class="author" itemprop="author">Steve Martin</small>
|
||||||
|
<a href="/author/Steve-Martin">(about)</a>
|
||||||
|
</span>
|
||||||
|
<div class="tags">
|
||||||
|
Tags:
|
||||||
|
<meta class="keywords" itemprop="keywords" content="humor,obvious,simile" / >
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/humor/page/1/">humor</a>
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/obvious/page/1/">obvious</a>
|
||||||
|
|
||||||
|
<a class="tag" href="/tag/simile/page/1/">simile</a>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<nav>
|
||||||
|
<ul class="pager">
|
||||||
|
|
||||||
|
|
||||||
|
<li class="next">
|
||||||
|
<a href="/page/2/">Next <span aria-hidden="true">→</span></a>
|
||||||
|
</li>
|
||||||
|
|
||||||
|
</ul>
|
||||||
|
</nav>
|
||||||
|
</div>
|
||||||
|
<div class="col-md-4 tags-box">
|
||||||
|
|
||||||
|
<h2>Top Ten tags</h2>
|
||||||
|
|
||||||
|
<span class="tag-item">
|
||||||
|
<a class="tag" style="font-size: 28px" href="/tag/love/">love</a>
|
||||||
|
</span>
|
||||||
|
|
||||||
|
<span class="tag-item">
|
||||||
|
<a class="tag" style="font-size: 26px" href="/tag/inspirational/">inspirational</a>
|
||||||
|
</span>
|
||||||
|
|
||||||
|
<span class="tag-item">
|
||||||
|
<a class="tag" style="font-size: 26px" href="/tag/life/">life</a>
|
||||||
|
</span>
|
||||||
|
|
||||||
|
<span class="tag-item">
|
||||||
|
<a class="tag" style="font-size: 24px" href="/tag/humor/">humor</a>
|
||||||
|
</span>
|
||||||
|
|
||||||
|
<span class="tag-item">
|
||||||
|
<a class="tag" style="font-size: 22px" href="/tag/books/">books</a>
|
||||||
|
</span>
|
||||||
|
|
||||||
|
<span class="tag-item">
|
||||||
|
<a class="tag" style="font-size: 14px" href="/tag/reading/">reading</a>
|
||||||
|
</span>
|
||||||
|
|
||||||
|
<span class="tag-item">
|
||||||
|
<a class="tag" style="font-size: 10px" href="/tag/friendship/">friendship</a>
|
||||||
|
</span>
|
||||||
|
|
||||||
|
<span class="tag-item">
|
||||||
|
<a class="tag" style="font-size: 8px" href="/tag/friends/">friends</a>
|
||||||
|
</span>
|
||||||
|
|
||||||
|
<span class="tag-item">
|
||||||
|
<a class="tag" style="font-size: 8px" href="/tag/truth/">truth</a>
|
||||||
|
</span>
|
||||||
|
|
||||||
|
<span class="tag-item">
|
||||||
|
<a class="tag" style="font-size: 6px" href="/tag/simile/">simile</a>
|
||||||
|
</span>
|
||||||
|
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<footer class="footer">
|
||||||
|
<div class="container">
|
||||||
|
<p class="text-muted">
|
||||||
|
Quotes by: <a href="https://www.goodreads.com/quotes">GoodReads.com</a>
|
||||||
|
</p>
|
||||||
|
<p class="copyright">
|
||||||
|
Made with <span class='sh-red'>❤</span> by <a href="https://scrapinghub.com">Scrapinghub</a>
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</footer>
|
||||||
|
</body>
|
||||||
|
</html>
|
@ -27,10 +27,12 @@ sys.path.insert(0, path.dirname(path.dirname(__file__)))
|
|||||||
# Add any Sphinx extension module names here, as strings. They can be extensions
|
# Add any Sphinx extension module names here, as strings. They can be extensions
|
||||||
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
|
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
|
||||||
extensions = [
|
extensions = [
|
||||||
|
'notfound.extension',
|
||||||
'scrapydocs',
|
'scrapydocs',
|
||||||
'sphinx.ext.autodoc',
|
'sphinx.ext.autodoc',
|
||||||
'sphinx.ext.coverage',
|
'sphinx.ext.coverage',
|
||||||
'sphinx.ext.intersphinx',
|
'sphinx.ext.intersphinx',
|
||||||
|
'sphinx.ext.viewcode',
|
||||||
]
|
]
|
||||||
|
|
||||||
# Add any paths that contain templates here, relative to this directory.
|
# Add any paths that contain templates here, relative to this directory.
|
||||||
@ -237,7 +239,7 @@ coverage_ignore_pyobjects = [
|
|||||||
r'\bContractsManager\b$',
|
r'\bContractsManager\b$',
|
||||||
|
|
||||||
# For default contracts we only want to document their general purpose in
|
# For default contracts we only want to document their general purpose in
|
||||||
# their constructor, the methods they reimplement to achieve that purpose
|
# their __init__ method, the methods they reimplement to achieve that purpose
|
||||||
# should be irrelevant to developers using those contracts.
|
# should be irrelevant to developers using those contracts.
|
||||||
r'\w+Contract\.(adjust_request_args|(pre|post)_process)$',
|
r'\w+Contract\.(adjust_request_args|(pre|post)_process)$',
|
||||||
|
|
||||||
@ -273,4 +275,5 @@ coverage_ignore_pyobjects = [
|
|||||||
|
|
||||||
intersphinx_mapping = {
|
intersphinx_mapping = {
|
||||||
'python': ('https://docs.python.org/3', None),
|
'python': ('https://docs.python.org/3', None),
|
||||||
|
'sphinx': ('https://www.sphinx-doc.org/en/stable', None),
|
||||||
}
|
}
|
||||||
|
29
docs/conftest.py
Normal file
29
docs/conftest.py
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
import os
|
||||||
|
from doctest import ELLIPSIS, NORMALIZE_WHITESPACE
|
||||||
|
|
||||||
|
from scrapy.http.response.html import HtmlResponse
|
||||||
|
from sybil import Sybil
|
||||||
|
from sybil.parsers.codeblock import CodeBlockParser
|
||||||
|
from sybil.parsers.doctest import DocTestParser
|
||||||
|
from sybil.parsers.skip import skip
|
||||||
|
|
||||||
|
|
||||||
|
def load_response(url, filename):
|
||||||
|
input_path = os.path.join(os.path.dirname(__file__), '_tests', filename)
|
||||||
|
with open(input_path, 'rb') as input_file:
|
||||||
|
return HtmlResponse(url, body=input_file.read())
|
||||||
|
|
||||||
|
|
||||||
|
def setup(namespace):
|
||||||
|
namespace['load_response'] = load_response
|
||||||
|
|
||||||
|
|
||||||
|
pytest_collect_file = Sybil(
|
||||||
|
parsers=[
|
||||||
|
DocTestParser(optionflags=ELLIPSIS | NORMALIZE_WHITESPACE),
|
||||||
|
CodeBlockParser(future_imports=['print_function']),
|
||||||
|
skip,
|
||||||
|
],
|
||||||
|
pattern='*.rst',
|
||||||
|
setup=setup,
|
||||||
|
).pytest()
|
@ -177,20 +177,19 @@ Documentation policies
|
|||||||
======================
|
======================
|
||||||
|
|
||||||
For reference documentation of API members (classes, methods, etc.) use
|
For reference documentation of API members (classes, methods, etc.) use
|
||||||
docstrings and make sure that the Sphinx documentation uses the autodoc_
|
docstrings and make sure that the Sphinx documentation uses the
|
||||||
extension to pull the docstrings. API reference documentation should follow
|
:mod:`~sphinx.ext.autodoc` extension to pull the docstrings. API reference
|
||||||
docstring conventions (`PEP 257`_) and be IDE-friendly: short, to the point,
|
documentation should follow docstring conventions (`PEP 257`_) and be
|
||||||
and it may provide short examples.
|
IDE-friendly: short, to the point, and it may provide short examples.
|
||||||
|
|
||||||
Other types of documentation, such as tutorials or topics, should be covered in
|
Other types of documentation, such as tutorials or topics, should be covered in
|
||||||
files within the ``docs/`` directory. This includes documentation that is
|
files within the ``docs/`` directory. This includes documentation that is
|
||||||
specific to an API member, but goes beyond API reference documentation.
|
specific to an API member, but goes beyond API reference documentation.
|
||||||
|
|
||||||
In any case, if something is covered in a docstring, use the autodoc_
|
In any case, if something is covered in a docstring, use the
|
||||||
extension to pull the docstring into the documentation instead of duplicating
|
:mod:`~sphinx.ext.autodoc` extension to pull the docstring into the
|
||||||
the docstring in files within the ``docs/`` directory.
|
documentation instead of duplicating the docstring in files within the
|
||||||
|
``docs/`` directory.
|
||||||
.. _autodoc: http://www.sphinx-doc.org/en/stable/ext/autodoc.html
|
|
||||||
|
|
||||||
Tests
|
Tests
|
||||||
=====
|
=====
|
||||||
|
@ -235,13 +235,16 @@ You will see something like::
|
|||||||
[s] shelp() Shell help (print this help)
|
[s] shelp() Shell help (print this help)
|
||||||
[s] fetch(req_or_url) Fetch request (or URL) and update local objects
|
[s] fetch(req_or_url) Fetch request (or URL) and update local objects
|
||||||
[s] view(response) View response in a browser
|
[s] view(response) View response in a browser
|
||||||
>>>
|
|
||||||
|
|
||||||
Using the shell, you can try selecting elements using `CSS`_ with the response
|
Using the shell, you can try selecting elements using `CSS`_ with the response
|
||||||
object::
|
object:
|
||||||
|
|
||||||
>>> response.css('title')
|
.. invisible-code-block: python
|
||||||
[<Selector xpath='descendant-or-self::title' data='<title>Quotes to Scrape</title>'>]
|
|
||||||
|
response = load_response('http://quotes.toscrape.com/page/1/', 'quotes1.html')
|
||||||
|
|
||||||
|
>>> response.css('title')
|
||||||
|
[<Selector xpath='descendant-or-self::title' data='<title>Quotes to Scrape</title>'>]
|
||||||
|
|
||||||
The result of running ``response.css('title')`` is a list-like object called
|
The result of running ``response.css('title')`` is a list-like object called
|
||||||
:class:`~scrapy.selector.SelectorList`, which represents a list of
|
:class:`~scrapy.selector.SelectorList`, which represents a list of
|
||||||
@ -372,6 +375,9 @@ we want::
|
|||||||
We get a list of selectors for the quote HTML elements with::
|
We get a list of selectors for the quote HTML elements with::
|
||||||
|
|
||||||
>>> response.css("div.quote")
|
>>> response.css("div.quote")
|
||||||
|
[<Selector xpath="descendant-or-self::div[@class and contains(concat(' ', normalize-space(@class), ' '), ' quote ')]" data='<div class="quote" itemscope itemtype...'>,
|
||||||
|
<Selector xpath="descendant-or-self::div[@class and contains(concat(' ', normalize-space(@class), ' '), ' quote ')]" data='<div class="quote" itemscope itemtype...'>,
|
||||||
|
...]
|
||||||
|
|
||||||
Each of the selectors returned by the query above allows us to run further
|
Each of the selectors returned by the query above allows us to run further
|
||||||
queries over their sub-elements. Let's assign the first selector to a
|
queries over their sub-elements. Let's assign the first selector to a
|
||||||
@ -396,6 +402,12 @@ to get all of them::
|
|||||||
>>> tags
|
>>> tags
|
||||||
['change', 'deep-thoughts', 'thinking', 'world']
|
['change', 'deep-thoughts', 'thinking', 'world']
|
||||||
|
|
||||||
|
.. invisible-code-block: python
|
||||||
|
|
||||||
|
from sys import version_info
|
||||||
|
|
||||||
|
.. skip: next if(version_info < (3, 6), reason="Only Python 3.6+ dictionaries match the output")
|
||||||
|
|
||||||
Having figured out how to extract each bit, we can now iterate over all the
|
Having figured out how to extract each bit, we can now iterate over all the
|
||||||
quotes elements and put them together into a Python dictionary::
|
quotes elements and put them together into a Python dictionary::
|
||||||
|
|
||||||
@ -404,10 +416,9 @@ quotes elements and put them together into a Python dictionary::
|
|||||||
... author = quote.css("small.author::text").get()
|
... author = quote.css("small.author::text").get()
|
||||||
... tags = quote.css("div.tags a.tag::text").getall()
|
... tags = quote.css("div.tags a.tag::text").getall()
|
||||||
... print(dict(text=text, author=author, tags=tags))
|
... print(dict(text=text, author=author, tags=tags))
|
||||||
{'tags': ['change', 'deep-thoughts', 'thinking', 'world'], 'author': 'Albert Einstein', 'text': '“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”'}
|
{'text': '“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”', 'author': 'Albert Einstein', 'tags': ['change', 'deep-thoughts', 'thinking', 'world']}
|
||||||
{'tags': ['abilities', 'choices'], 'author': 'J.K. Rowling', 'text': '“It is our choices, Harry, that show what we truly are, far more than our abilities.”'}
|
{'text': '“It is our choices, Harry, that show what we truly are, far more than our abilities.”', 'author': 'J.K. Rowling', 'tags': ['abilities', 'choices']}
|
||||||
... a few more of these, omitted for brevity
|
...
|
||||||
>>>
|
|
||||||
|
|
||||||
Extracting data in our spider
|
Extracting data in our spider
|
||||||
-----------------------------
|
-----------------------------
|
||||||
@ -521,7 +532,7 @@ There is also an ``attrib`` property available
|
|||||||
(see :ref:`selecting-attributes` for more)::
|
(see :ref:`selecting-attributes` for more)::
|
||||||
|
|
||||||
>>> response.css('li.next a').attrib['href']
|
>>> response.css('li.next a').attrib['href']
|
||||||
'/page/2'
|
'/page/2/'
|
||||||
|
|
||||||
Let's see now our spider modified to recursively follow the link to the next
|
Let's see now our spider modified to recursively follow the link to the next
|
||||||
page, extracting data from it::
|
page, extracting data from it::
|
||||||
|
@ -308,12 +308,12 @@ New features
|
|||||||
convenient way to build JSON requests (:issue:`3504`, :issue:`3505`)
|
convenient way to build JSON requests (:issue:`3504`, :issue:`3505`)
|
||||||
|
|
||||||
* A ``process_request`` callback passed to the :class:`~scrapy.spiders.Rule`
|
* A ``process_request`` callback passed to the :class:`~scrapy.spiders.Rule`
|
||||||
constructor now receives the :class:`~scrapy.http.Response` object that
|
``__init__`` method now receives the :class:`~scrapy.http.Response` object that
|
||||||
originated the request as its second argument (:issue:`3682`)
|
originated the request as its second argument (:issue:`3682`)
|
||||||
|
|
||||||
* A new ``restrict_text`` parameter for the
|
* A new ``restrict_text`` parameter for the
|
||||||
:attr:`LinkExtractor <scrapy.linkextractors.lxmlhtml.LxmlLinkExtractor>`
|
:attr:`LinkExtractor <scrapy.linkextractors.lxmlhtml.LxmlLinkExtractor>`
|
||||||
constructor allows filtering links by linking text (:issue:`3622`,
|
``__init__`` method allows filtering links by linking text (:issue:`3622`,
|
||||||
:issue:`3635`)
|
:issue:`3635`)
|
||||||
|
|
||||||
* A new :setting:`FEED_STORAGE_S3_ACL` setting allows defining a custom ACL
|
* A new :setting:`FEED_STORAGE_S3_ACL` setting allows defining a custom ACL
|
||||||
@ -479,7 +479,7 @@ The following deprecated APIs have been removed (:issue:`3578`):
|
|||||||
|
|
||||||
* From :class:`~scrapy.selector.Selector`:
|
* From :class:`~scrapy.selector.Selector`:
|
||||||
|
|
||||||
* ``_root`` (both the constructor argument and the object property, use
|
* ``_root`` (both the ``__init__`` method argument and the object property, use
|
||||||
``root``)
|
``root``)
|
||||||
|
|
||||||
* ``extract_unquoted`` (use ``getall``)
|
* ``extract_unquoted`` (use ``getall``)
|
||||||
@ -2703,7 +2703,7 @@ Scrapy changes:
|
|||||||
- removed ``ENCODING_ALIASES`` setting, as encoding auto-detection has been moved to the `w3lib`_ library
|
- removed ``ENCODING_ALIASES`` setting, as encoding auto-detection has been moved to the `w3lib`_ library
|
||||||
- promoted :ref:`topics-djangoitem` to main contrib
|
- promoted :ref:`topics-djangoitem` to main contrib
|
||||||
- LogFormatter method now return dicts(instead of strings) to support lazy formatting (:issue:`164`, :commit:`dcef7b0`)
|
- LogFormatter method now return dicts(instead of strings) to support lazy formatting (:issue:`164`, :commit:`dcef7b0`)
|
||||||
- downloader handlers (:setting:`DOWNLOAD_HANDLERS` setting) now receive settings as the first argument of the constructor
|
- downloader handlers (:setting:`DOWNLOAD_HANDLERS` setting) now receive settings as the first argument of the ``__init__`` method
|
||||||
- replaced memory usage acounting with (more portable) `resource`_ module, removed ``scrapy.utils.memory`` module
|
- replaced memory usage acounting with (more portable) `resource`_ module, removed ``scrapy.utils.memory`` module
|
||||||
- removed signal: ``scrapy.mail.mail_sent``
|
- removed signal: ``scrapy.mail.mail_sent``
|
||||||
- removed ``TRACK_REFS`` setting, now :ref:`trackrefs <topics-leaks-trackrefs>` is always enabled
|
- removed ``TRACK_REFS`` setting, now :ref:`trackrefs <topics-leaks-trackrefs>` is always enabled
|
||||||
@ -2917,7 +2917,7 @@ API changes
|
|||||||
- ``Request.copy()`` and ``Request.replace()`` now also copies their ``callback`` and ``errback`` attributes (#231)
|
- ``Request.copy()`` and ``Request.replace()`` now also copies their ``callback`` and ``errback`` attributes (#231)
|
||||||
- Removed ``UrlFilterMiddleware`` from ``scrapy.contrib`` (already disabled by default)
|
- Removed ``UrlFilterMiddleware`` from ``scrapy.contrib`` (already disabled by default)
|
||||||
- Offsite middelware doesn't filter out any request coming from a spider that doesn't have a allowed_domains attribute (#225)
|
- Offsite middelware doesn't filter out any request coming from a spider that doesn't have a allowed_domains attribute (#225)
|
||||||
- Removed Spider Manager ``load()`` method. Now spiders are loaded in the constructor itself.
|
- Removed Spider Manager ``load()`` method. Now spiders are loaded in the ``__init__`` method itself.
|
||||||
- Changes to Scrapy Manager (now called "Crawler"):
|
- Changes to Scrapy Manager (now called "Crawler"):
|
||||||
- ``scrapy.core.manager.ScrapyManager`` class renamed to ``scrapy.crawler.Crawler``
|
- ``scrapy.core.manager.ScrapyManager`` class renamed to ``scrapy.crawler.Crawler``
|
||||||
- ``scrapy.core.manager.scrapymanager`` singleton moved to ``scrapy.project.crawler``
|
- ``scrapy.core.manager.scrapymanager`` singleton moved to ``scrapy.project.crawler``
|
||||||
|
@ -1,2 +1,3 @@
|
|||||||
Sphinx>=2.1
|
Sphinx>=2.1
|
||||||
sphinx_rtd_theme
|
sphinx-notfound-page
|
||||||
|
sphinx_rtd_theme
|
||||||
|
@ -21,7 +21,7 @@ Quick example
|
|||||||
=============
|
=============
|
||||||
|
|
||||||
There are two ways to instantiate the mail sender. You can instantiate it using
|
There are two ways to instantiate the mail sender. You can instantiate it using
|
||||||
the standard constructor::
|
the standard ``__init__`` method::
|
||||||
|
|
||||||
from scrapy.mail import MailSender
|
from scrapy.mail import MailSender
|
||||||
mailer = MailSender()
|
mailer = MailSender()
|
||||||
@ -111,7 +111,7 @@ uses `Twisted non-blocking IO`_, like the rest of the framework.
|
|||||||
Mail settings
|
Mail settings
|
||||||
=============
|
=============
|
||||||
|
|
||||||
These settings define the default constructor values of the :class:`MailSender`
|
These settings define the default ``__init__`` method values of the :class:`MailSender`
|
||||||
class, and can be used to configure e-mail notifications in your project without
|
class, and can be used to configure e-mail notifications in your project without
|
||||||
writing any code (for those extensions and code that uses :class:`MailSender`).
|
writing any code (for those extensions and code that uses :class:`MailSender`).
|
||||||
|
|
||||||
|
@ -87,8 +87,8 @@ described next.
|
|||||||
1. Declaring a serializer in the field
|
1. Declaring a serializer in the field
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
|
|
||||||
If you use :class:`~.Item` you can declare a serializer in the
|
If you use :class:`~.Item` you can declare a serializer in the
|
||||||
:ref:`field metadata <topics-items-fields>`. The serializer must be
|
:ref:`field metadata <topics-items-fields>`. The serializer must be
|
||||||
a callable which receives a value and returns its serialized form.
|
a callable which receives a value and returns its serialized form.
|
||||||
|
|
||||||
Example::
|
Example::
|
||||||
@ -144,7 +144,7 @@ BaseItemExporter
|
|||||||
defining what fields to export, whether to export empty fields, or which
|
defining what fields to export, whether to export empty fields, or which
|
||||||
encoding to use.
|
encoding to use.
|
||||||
|
|
||||||
These features can be configured through the constructor arguments which
|
These features can be configured through the ``__init__`` method arguments which
|
||||||
populate their respective instance attributes: :attr:`fields_to_export`,
|
populate their respective instance attributes: :attr:`fields_to_export`,
|
||||||
:attr:`export_empty_fields`, :attr:`encoding`, :attr:`indent`.
|
:attr:`export_empty_fields`, :attr:`encoding`, :attr:`indent`.
|
||||||
|
|
||||||
@ -246,8 +246,8 @@ XmlItemExporter
|
|||||||
:param item_element: The name of each item element in the exported XML.
|
:param item_element: The name of each item element in the exported XML.
|
||||||
:type item_element: str
|
:type item_element: str
|
||||||
|
|
||||||
The additional keyword arguments of this constructor are passed to the
|
The additional keyword arguments of this ``__init__`` method are passed to the
|
||||||
:class:`BaseItemExporter` constructor.
|
:class:`BaseItemExporter` ``__init__`` method.
|
||||||
|
|
||||||
A typical output of this exporter would be::
|
A typical output of this exporter would be::
|
||||||
|
|
||||||
@ -306,9 +306,9 @@ CsvItemExporter
|
|||||||
multi-valued fields, if found.
|
multi-valued fields, if found.
|
||||||
:type include_headers_line: str
|
:type include_headers_line: str
|
||||||
|
|
||||||
The additional keyword arguments of this constructor are passed to the
|
The additional keyword arguments of this ``__init__`` method are passed to the
|
||||||
:class:`BaseItemExporter` constructor, and the leftover arguments to the
|
:class:`BaseItemExporter` ``__init__`` method, and the leftover arguments to the
|
||||||
`csv.writer`_ constructor, so you can use any ``csv.writer`` constructor
|
`csv.writer`_ ``__init__`` method, so you can use any ``csv.writer`` ``__init__`` method
|
||||||
argument to customize this exporter.
|
argument to customize this exporter.
|
||||||
|
|
||||||
A typical output of this exporter would be::
|
A typical output of this exporter would be::
|
||||||
@ -334,8 +334,8 @@ PickleItemExporter
|
|||||||
|
|
||||||
For more information, refer to the `pickle module documentation`_.
|
For more information, refer to the `pickle module documentation`_.
|
||||||
|
|
||||||
The additional keyword arguments of this constructor are passed to the
|
The additional keyword arguments of this ``__init__`` method are passed to the
|
||||||
:class:`BaseItemExporter` constructor.
|
:class:`BaseItemExporter` ``__init__`` method.
|
||||||
|
|
||||||
Pickle isn't a human readable format, so no output examples are provided.
|
Pickle isn't a human readable format, so no output examples are provided.
|
||||||
|
|
||||||
@ -351,8 +351,8 @@ PprintItemExporter
|
|||||||
:param file: the file-like object to use for exporting the data. Its ``write`` method should
|
:param file: the file-like object to use for exporting the data. Its ``write`` method should
|
||||||
accept ``bytes`` (a disk file opened in binary mode, a ``io.BytesIO`` object, etc)
|
accept ``bytes`` (a disk file opened in binary mode, a ``io.BytesIO`` object, etc)
|
||||||
|
|
||||||
The additional keyword arguments of this constructor are passed to the
|
The additional keyword arguments of this ``__init__`` method are passed to the
|
||||||
:class:`BaseItemExporter` constructor.
|
:class:`BaseItemExporter` ``__init__`` method.
|
||||||
|
|
||||||
A typical output of this exporter would be::
|
A typical output of this exporter would be::
|
||||||
|
|
||||||
@ -367,10 +367,10 @@ JsonItemExporter
|
|||||||
.. class:: JsonItemExporter(file, \**kwargs)
|
.. class:: JsonItemExporter(file, \**kwargs)
|
||||||
|
|
||||||
Exports Items in JSON format to the specified file-like object, writing all
|
Exports Items in JSON format to the specified file-like object, writing all
|
||||||
objects as a list of objects. The additional constructor arguments are
|
objects as a list of objects. The additional ``__init__`` method arguments are
|
||||||
passed to the :class:`BaseItemExporter` constructor, and the leftover
|
passed to the :class:`BaseItemExporter` ``__init__`` method, and the leftover
|
||||||
arguments to the `JSONEncoder`_ constructor, so you can use any
|
arguments to the `JSONEncoder`_ ``__init__`` method, so you can use any
|
||||||
`JSONEncoder`_ constructor argument to customize this exporter.
|
`JSONEncoder`_ ``__init__`` method argument to customize this exporter.
|
||||||
|
|
||||||
:param file: the file-like object to use for exporting the data. Its ``write`` method should
|
:param file: the file-like object to use for exporting the data. Its ``write`` method should
|
||||||
accept ``bytes`` (a disk file opened in binary mode, a ``io.BytesIO`` object, etc)
|
accept ``bytes`` (a disk file opened in binary mode, a ``io.BytesIO`` object, etc)
|
||||||
@ -398,10 +398,10 @@ JsonLinesItemExporter
|
|||||||
.. class:: JsonLinesItemExporter(file, \**kwargs)
|
.. class:: JsonLinesItemExporter(file, \**kwargs)
|
||||||
|
|
||||||
Exports Items in JSON format to the specified file-like object, writing one
|
Exports Items in JSON format to the specified file-like object, writing one
|
||||||
JSON-encoded item per line. The additional constructor arguments are passed
|
JSON-encoded item per line. The additional ``__init__`` method arguments are passed
|
||||||
to the :class:`BaseItemExporter` constructor, and the leftover arguments to
|
to the :class:`BaseItemExporter` ``__init__`` method, and the leftover arguments to
|
||||||
the `JSONEncoder`_ constructor, so you can use any `JSONEncoder`_
|
the `JSONEncoder`_ ``__init__`` method, so you can use any `JSONEncoder`_
|
||||||
constructor argument to customize this exporter.
|
``__init__`` method argument to customize this exporter.
|
||||||
|
|
||||||
:param file: the file-like object to use for exporting the data. Its ``write`` method should
|
:param file: the file-like object to use for exporting the data. Its ``write`` method should
|
||||||
accept ``bytes`` (a disk file opened in binary mode, a ``io.BytesIO`` object, etc)
|
accept ``bytes`` (a disk file opened in binary mode, a ``io.BytesIO`` object, etc)
|
||||||
|
@ -28,7 +28,7 @@ Loading & activating extensions
|
|||||||
|
|
||||||
Extensions are loaded and activated at startup by instantiating a single
|
Extensions are loaded and activated at startup by instantiating a single
|
||||||
instance of the extension class. Therefore, all the extension initialization
|
instance of the extension class. Therefore, all the extension initialization
|
||||||
code must be performed in the class constructor (``__init__`` method).
|
code must be performed in the class ``__init__`` method.
|
||||||
|
|
||||||
To make an extension available, add it to the :setting:`EXTENSIONS` setting in
|
To make an extension available, add it to the :setting:`EXTENSIONS` setting in
|
||||||
your Scrapy settings. In :setting:`EXTENSIONS`, each extension is represented
|
your Scrapy settings. In :setting:`EXTENSIONS`, each extension is represented
|
||||||
|
@ -16,12 +16,12 @@ especially in a larger project with many spiders.
|
|||||||
To define common output data format Scrapy provides the :class:`Item` class.
|
To define common output data format Scrapy provides the :class:`Item` class.
|
||||||
:class:`Item` objects are simple containers used to collect the scraped data.
|
:class:`Item` objects are simple containers used to collect the scraped data.
|
||||||
They provide a `dictionary-like`_ API with a convenient syntax for declaring
|
They provide a `dictionary-like`_ API with a convenient syntax for declaring
|
||||||
their available fields.
|
their available fields.
|
||||||
|
|
||||||
Various Scrapy components use extra information provided by Items:
|
Various Scrapy components use extra information provided by Items:
|
||||||
exporters look at declared fields to figure out columns to export,
|
exporters look at declared fields to figure out columns to export,
|
||||||
serialization can be customized using Item fields metadata, :mod:`trackref`
|
serialization can be customized using Item fields metadata, :mod:`trackref`
|
||||||
tracks Item instances to help find memory leaks
|
tracks Item instances to help find memory leaks
|
||||||
(see :ref:`topics-leaks-trackrefs`), etc.
|
(see :ref:`topics-leaks-trackrefs`), etc.
|
||||||
|
|
||||||
.. _dictionary-like: https://docs.python.org/2/library/stdtypes.html#dict
|
.. _dictionary-like: https://docs.python.org/2/library/stdtypes.html#dict
|
||||||
@ -237,7 +237,7 @@ Item objects
|
|||||||
|
|
||||||
Return a new Item optionally initialized from the given argument.
|
Return a new Item optionally initialized from the given argument.
|
||||||
|
|
||||||
Items replicate the standard `dict API`_, including its constructor, and
|
Items replicate the standard `dict API`_, including its ``__init__`` method, and
|
||||||
also provide the following additional API members:
|
also provide the following additional API members:
|
||||||
|
|
||||||
.. automethod:: copy
|
.. automethod:: copy
|
||||||
|
@ -71,34 +71,11 @@ on cookies.
|
|||||||
Request serialization
|
Request serialization
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
Requests must be serializable by the ``pickle`` module, in order for persistence
|
For persistence to work, :class:`~scrapy.http.Request` objects must be
|
||||||
to work, so you should make sure that your requests are serializable.
|
serializable with :mod:`pickle`, except for the ``callback`` and ``errback``
|
||||||
|
values passed to their ``__init__`` method, which must be methods of the
|
||||||
The most common issue here is to use ``lambda`` functions on request callbacks that
|
runnning :class:`~scrapy.spiders.Spider` class.
|
||||||
can't be persisted.
|
|
||||||
|
|
||||||
So, for example, this won't work::
|
|
||||||
|
|
||||||
def some_callback(self, response):
|
|
||||||
somearg = 'test'
|
|
||||||
return scrapy.Request('http://www.example.com',
|
|
||||||
callback=lambda r: self.other_callback(r, somearg))
|
|
||||||
|
|
||||||
def other_callback(self, response, somearg):
|
|
||||||
print("the argument passed is: %s" % somearg)
|
|
||||||
|
|
||||||
But this will::
|
|
||||||
|
|
||||||
def some_callback(self, response):
|
|
||||||
somearg = 'test'
|
|
||||||
return scrapy.Request('http://www.example.com',
|
|
||||||
callback=self.other_callback, cb_kwargs={'somearg': somearg})
|
|
||||||
|
|
||||||
def other_callback(self, response, somearg):
|
|
||||||
print("the argument passed is: %s" % somearg)
|
|
||||||
|
|
||||||
If you wish to log the requests that couldn't be serialized, you can set the
|
If you wish to log the requests that couldn't be serialized, you can set the
|
||||||
:setting:`SCHEDULER_DEBUG` setting to ``True`` in the project's settings page.
|
:setting:`SCHEDULER_DEBUG` setting to ``True`` in the project's settings page.
|
||||||
It is ``False`` by default.
|
It is ``False`` by default.
|
||||||
|
|
||||||
.. _pickle: https://docs.python.org/library/pickle.html
|
|
||||||
|
@ -26,7 +26,7 @@ Using Item Loaders to populate items
|
|||||||
|
|
||||||
To use an Item Loader, you must first instantiate it. You can either
|
To use an Item Loader, you must first instantiate it. You can either
|
||||||
instantiate it with a dict-like object (e.g. Item or dict) or without one, in
|
instantiate it with a dict-like object (e.g. Item or dict) or without one, in
|
||||||
which case an Item is automatically instantiated in the Item Loader constructor
|
which case an Item is automatically instantiated in the Item Loader ``__init__`` method
|
||||||
using the Item class specified in the :attr:`ItemLoader.default_item_class`
|
using the Item class specified in the :attr:`ItemLoader.default_item_class`
|
||||||
attribute.
|
attribute.
|
||||||
|
|
||||||
@ -271,7 +271,7 @@ There are several ways to modify Item Loader context values:
|
|||||||
loader.context['unit'] = 'cm'
|
loader.context['unit'] = 'cm'
|
||||||
|
|
||||||
2. On Item Loader instantiation (the keyword arguments of Item Loader
|
2. On Item Loader instantiation (the keyword arguments of Item Loader
|
||||||
constructor are stored in the Item Loader context)::
|
``__init__`` method are stored in the Item Loader context)::
|
||||||
|
|
||||||
loader = ItemLoader(product, unit='cm')
|
loader = ItemLoader(product, unit='cm')
|
||||||
|
|
||||||
@ -500,7 +500,7 @@ ItemLoader objects
|
|||||||
.. attribute:: default_item_class
|
.. attribute:: default_item_class
|
||||||
|
|
||||||
An Item class (or factory), used to instantiate items when not given in
|
An Item class (or factory), used to instantiate items when not given in
|
||||||
the constructor.
|
the ``__init__`` method.
|
||||||
|
|
||||||
.. attribute:: default_input_processor
|
.. attribute:: default_input_processor
|
||||||
|
|
||||||
@ -515,15 +515,15 @@ ItemLoader objects
|
|||||||
.. attribute:: default_selector_class
|
.. attribute:: default_selector_class
|
||||||
|
|
||||||
The class used to construct the :attr:`selector` of this
|
The class used to construct the :attr:`selector` of this
|
||||||
:class:`ItemLoader`, if only a response is given in the constructor.
|
:class:`ItemLoader`, if only a response is given in the ``__init__`` method.
|
||||||
If a selector is given in the constructor this attribute is ignored.
|
If a selector is given in the ``__init__`` method this attribute is ignored.
|
||||||
This attribute is sometimes overridden in subclasses.
|
This attribute is sometimes overridden in subclasses.
|
||||||
|
|
||||||
.. attribute:: selector
|
.. attribute:: selector
|
||||||
|
|
||||||
The :class:`~scrapy.selector.Selector` object to extract data from.
|
The :class:`~scrapy.selector.Selector` object to extract data from.
|
||||||
It's either the selector given in the constructor or one created from
|
It's either the selector given in the ``__init__`` method or one created from
|
||||||
the response given in the constructor using the
|
the response given in the ``__init__`` method using the
|
||||||
:attr:`default_selector_class`. This attribute is meant to be
|
:attr:`default_selector_class`. This attribute is meant to be
|
||||||
read-only.
|
read-only.
|
||||||
|
|
||||||
@ -648,7 +648,7 @@ Here is a list of all built-in processors:
|
|||||||
.. class:: Identity
|
.. class:: Identity
|
||||||
|
|
||||||
The simplest processor, which doesn't do anything. It returns the original
|
The simplest processor, which doesn't do anything. It returns the original
|
||||||
values unchanged. It doesn't receive any constructor arguments, nor does it
|
values unchanged. It doesn't receive any ``__init__`` method arguments, nor does it
|
||||||
accept Loader contexts.
|
accept Loader contexts.
|
||||||
|
|
||||||
Example::
|
Example::
|
||||||
@ -662,7 +662,7 @@ Here is a list of all built-in processors:
|
|||||||
|
|
||||||
Returns the first non-null/non-empty value from the values received,
|
Returns the first non-null/non-empty value from the values received,
|
||||||
so it's typically used as an output processor to single-valued fields.
|
so it's typically used as an output processor to single-valued fields.
|
||||||
It doesn't receive any constructor arguments, nor does it accept Loader contexts.
|
It doesn't receive any ``__init__`` method arguments, nor does it accept Loader contexts.
|
||||||
|
|
||||||
Example::
|
Example::
|
||||||
|
|
||||||
@ -673,7 +673,7 @@ Here is a list of all built-in processors:
|
|||||||
|
|
||||||
.. class:: Join(separator=u' ')
|
.. class:: Join(separator=u' ')
|
||||||
|
|
||||||
Returns the values joined with the separator given in the constructor, which
|
Returns the values joined with the separator given in the ``__init__`` method, which
|
||||||
defaults to ``u' '``. It doesn't accept Loader contexts.
|
defaults to ``u' '``. It doesn't accept Loader contexts.
|
||||||
|
|
||||||
When using the default separator, this processor is equivalent to the
|
When using the default separator, this processor is equivalent to the
|
||||||
@ -711,7 +711,7 @@ Here is a list of all built-in processors:
|
|||||||
those which do, this processor will pass the currently active :ref:`Loader
|
those which do, this processor will pass the currently active :ref:`Loader
|
||||||
context <topics-loaders-context>` through that parameter.
|
context <topics-loaders-context>` through that parameter.
|
||||||
|
|
||||||
The keyword arguments passed in the constructor are used as the default
|
The keyword arguments passed in the ``__init__`` method are used as the default
|
||||||
Loader context values passed to each function call. However, the final
|
Loader context values passed to each function call. However, the final
|
||||||
Loader context values passed to functions are overridden with the currently
|
Loader context values passed to functions are overridden with the currently
|
||||||
active Loader context accessible through the :meth:`ItemLoader.context`
|
active Loader context accessible through the :meth:`ItemLoader.context`
|
||||||
@ -755,12 +755,12 @@ Here is a list of all built-in processors:
|
|||||||
['HELLO, 'THIS', 'IS', 'SCRAPY']
|
['HELLO, 'THIS', 'IS', 'SCRAPY']
|
||||||
|
|
||||||
As with the Compose processor, functions can receive Loader contexts, and
|
As with the Compose processor, functions can receive Loader contexts, and
|
||||||
constructor keyword arguments are used as default context values. See
|
``__init__`` method keyword arguments are used as default context values. See
|
||||||
:class:`Compose` processor for more info.
|
:class:`Compose` processor for more info.
|
||||||
|
|
||||||
.. class:: SelectJmes(json_path)
|
.. class:: SelectJmes(json_path)
|
||||||
|
|
||||||
Queries the value using the json path provided to the constructor and returns the output.
|
Queries the value using the json path provided to the ``__init__`` method and returns the output.
|
||||||
Requires jmespath (https://github.com/jmespath/jmespath.py) to run.
|
Requires jmespath (https://github.com/jmespath/jmespath.py) to run.
|
||||||
This processor takes only one input at a time.
|
This processor takes only one input at a time.
|
||||||
|
|
||||||
|
@ -255,18 +255,18 @@ scrapy.utils.log module
|
|||||||
when running custom scripts using :class:`~scrapy.crawler.CrawlerRunner`.
|
when running custom scripts using :class:`~scrapy.crawler.CrawlerRunner`.
|
||||||
In that case, its usage is not required but it's recommended.
|
In that case, its usage is not required but it's recommended.
|
||||||
|
|
||||||
If you plan on configuring the handlers yourself is still recommended you
|
Another option when running custom scripts is to manually configure the logging.
|
||||||
call this function, passing ``install_root_handler=False``. Bear in mind
|
To do this you can use `logging.basicConfig()`_ to set a basic root handler.
|
||||||
there won't be any log output set by default in that case.
|
|
||||||
|
|
||||||
To get you started on manually configuring logging's output, you can use
|
Note that :class:`~scrapy.crawler.CrawlerProcess` automatically calls ``configure_logging``,
|
||||||
`logging.basicConfig()`_ to set a basic root handler. This is an example
|
so it is recommended to only use `logging.basicConfig()`_ together with
|
||||||
on how to redirect ``INFO`` or higher messages to a file::
|
:class:`~scrapy.crawler.CrawlerRunner`.
|
||||||
|
|
||||||
|
This is an example on how to redirect ``INFO`` or higher messages to a file::
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from scrapy.utils.log import configure_logging
|
from scrapy.utils.log import configure_logging
|
||||||
|
|
||||||
configure_logging(install_root_handler=False)
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
filename='log.txt',
|
filename='log.txt',
|
||||||
format='%(levelname)s: %(message)s',
|
format='%(levelname)s: %(message)s',
|
||||||
|
@ -137,7 +137,7 @@ Request objects
|
|||||||
|
|
||||||
A string containing the URL of this request. Keep in mind that this
|
A string containing the URL of this request. Keep in mind that this
|
||||||
attribute contains the escaped URL, so it can differ from the URL passed in
|
attribute contains the escaped URL, so it can differ from the URL passed in
|
||||||
the constructor.
|
the ``__init__`` method.
|
||||||
|
|
||||||
This attribute is read-only. To change the URL of a Request use
|
This attribute is read-only. To change the URL of a Request use
|
||||||
:meth:`replace`.
|
:meth:`replace`.
|
||||||
@ -400,7 +400,7 @@ fields with form data from :class:`Response` objects.
|
|||||||
|
|
||||||
.. class:: FormRequest(url, [formdata, ...])
|
.. class:: FormRequest(url, [formdata, ...])
|
||||||
|
|
||||||
The :class:`FormRequest` class adds a new keyword parameter to the constructor. The
|
The :class:`FormRequest` class adds a new keyword parameter to the ``__init__`` method. The
|
||||||
remaining arguments are the same as for the :class:`Request` class and are
|
remaining arguments are the same as for the :class:`Request` class and are
|
||||||
not documented here.
|
not documented here.
|
||||||
|
|
||||||
@ -473,7 +473,7 @@ fields with form data from :class:`Response` objects.
|
|||||||
:type dont_click: boolean
|
:type dont_click: boolean
|
||||||
|
|
||||||
The other parameters of this class method are passed directly to the
|
The other parameters of this class method are passed directly to the
|
||||||
:class:`FormRequest` constructor.
|
:class:`FormRequest` ``__init__`` method.
|
||||||
|
|
||||||
.. versionadded:: 0.10.3
|
.. versionadded:: 0.10.3
|
||||||
The ``formname`` parameter.
|
The ``formname`` parameter.
|
||||||
@ -547,7 +547,7 @@ dealing with JSON requests.
|
|||||||
|
|
||||||
.. class:: JsonRequest(url, [... data, dumps_kwargs])
|
.. class:: JsonRequest(url, [... data, dumps_kwargs])
|
||||||
|
|
||||||
The :class:`JsonRequest` class adds two new keyword parameters to the constructor. The
|
The :class:`JsonRequest` class adds two new keyword parameters to the ``__init__`` method. The
|
||||||
remaining arguments are the same as for the :class:`Request` class and are
|
remaining arguments are the same as for the :class:`Request` class and are
|
||||||
not documented here.
|
not documented here.
|
||||||
|
|
||||||
@ -556,7 +556,7 @@ dealing with JSON requests.
|
|||||||
|
|
||||||
:param data: is any JSON serializable object that needs to be JSON encoded and assigned to body.
|
:param data: is any JSON serializable object that needs to be JSON encoded and assigned to body.
|
||||||
if :attr:`Request.body` argument is provided this parameter will be ignored.
|
if :attr:`Request.body` argument is provided this parameter will be ignored.
|
||||||
if :attr:`Request.body` argument is not provided and data argument is provided :attr:`Request.method` will be
|
if :attr:`Request.body` argument is not provided and data argument is provided :attr:`Request.method` will be
|
||||||
set to ``'POST'`` automatically.
|
set to ``'POST'`` automatically.
|
||||||
:type data: JSON serializable object
|
:type data: JSON serializable object
|
||||||
|
|
||||||
@ -721,7 +721,7 @@ TextResponse objects
|
|||||||
:class:`Response` class, which is meant to be used only for binary data,
|
:class:`Response` class, which is meant to be used only for binary data,
|
||||||
such as images, sounds or any media file.
|
such as images, sounds or any media file.
|
||||||
|
|
||||||
:class:`TextResponse` objects support a new constructor argument, in
|
:class:`TextResponse` objects support a new ``__init__`` method argument, in
|
||||||
addition to the base :class:`Response` objects. The remaining functionality
|
addition to the base :class:`Response` objects. The remaining functionality
|
||||||
is the same as for the :class:`Response` class and is not documented here.
|
is the same as for the :class:`Response` class and is not documented here.
|
||||||
|
|
||||||
@ -755,7 +755,7 @@ TextResponse objects
|
|||||||
A string with the encoding of this response. The encoding is resolved by
|
A string with the encoding of this response. The encoding is resolved by
|
||||||
trying the following mechanisms, in order:
|
trying the following mechanisms, in order:
|
||||||
|
|
||||||
1. the encoding passed in the constructor ``encoding`` argument
|
1. the encoding passed in the ``__init__`` method ``encoding`` argument
|
||||||
|
|
||||||
2. the encoding declared in the Content-Type HTTP header. If this
|
2. the encoding declared in the Content-Type HTTP header. If this
|
||||||
encoding is not valid (ie. unknown), it is ignored and the next
|
encoding is not valid (ie. unknown), it is ignored and the next
|
||||||
|
39
pytest.ini
39
pytest.ini
@ -2,7 +2,24 @@
|
|||||||
usefixtures = chdir
|
usefixtures = chdir
|
||||||
python_files=test_*.py __init__.py
|
python_files=test_*.py __init__.py
|
||||||
python_classes=
|
python_classes=
|
||||||
addopts = --doctest-modules --assert=plain
|
addopts =
|
||||||
|
--assert=plain
|
||||||
|
--doctest-modules
|
||||||
|
--ignore=docs/_ext
|
||||||
|
--ignore=docs/conf.py
|
||||||
|
--ignore=docs/news.rst
|
||||||
|
--ignore=docs/topics/commands.rst
|
||||||
|
--ignore=docs/topics/debug.rst
|
||||||
|
--ignore=docs/topics/developer-tools.rst
|
||||||
|
--ignore=docs/topics/dynamic-content.rst
|
||||||
|
--ignore=docs/topics/items.rst
|
||||||
|
--ignore=docs/topics/leaks.rst
|
||||||
|
--ignore=docs/topics/loaders.rst
|
||||||
|
--ignore=docs/topics/selectors.rst
|
||||||
|
--ignore=docs/topics/shell.rst
|
||||||
|
--ignore=docs/topics/stats.rst
|
||||||
|
--ignore=docs/topics/telnetconsole.rst
|
||||||
|
--ignore=docs/utils
|
||||||
twisted = 1
|
twisted = 1
|
||||||
flake8-ignore =
|
flake8-ignore =
|
||||||
# extras
|
# extras
|
||||||
@ -30,7 +47,7 @@ flake8-ignore =
|
|||||||
scrapy/core/engine.py E261 E501 E128 E127 E306 E502
|
scrapy/core/engine.py E261 E501 E128 E127 E306 E502
|
||||||
scrapy/core/scheduler.py E501
|
scrapy/core/scheduler.py E501
|
||||||
scrapy/core/scraper.py E501 E306 E261 E128 W504
|
scrapy/core/scraper.py E501 E306 E261 E128 W504
|
||||||
scrapy/core/spidermw.py E501 E731 E502 E231 E126 E226
|
scrapy/core/spidermw.py E501 E731 E502 E126 E226
|
||||||
scrapy/core/downloader/__init__.py F401 E501
|
scrapy/core/downloader/__init__.py F401 E501
|
||||||
scrapy/core/downloader/contextfactory.py E501 E128 E126
|
scrapy/core/downloader/contextfactory.py E501 E128 E126
|
||||||
scrapy/core/downloader/middleware.py E501 E502
|
scrapy/core/downloader/middleware.py E501 E502
|
||||||
@ -175,14 +192,14 @@ flake8-ignore =
|
|||||||
tests/test_crawl.py E501 E741 E265
|
tests/test_crawl.py E501 E741 E265
|
||||||
tests/test_crawler.py F841 E306 E501
|
tests/test_crawler.py F841 E306 E501
|
||||||
tests/test_dependencies.py E302 F841 E501 E305
|
tests/test_dependencies.py E302 F841 E501 E305
|
||||||
tests/test_downloader_handlers.py E124 E127 E128 E225 E261 E265 F401 E501 E502 E701 E711 E126 E226 E123
|
tests/test_downloader_handlers.py E124 E127 E128 E225 E261 E265 F401 E501 E502 E701 E126 E226 E123
|
||||||
tests/test_downloadermiddleware.py E501
|
tests/test_downloadermiddleware.py E501
|
||||||
tests/test_downloadermiddleware_ajaxcrawlable.py E302 E501
|
tests/test_downloadermiddleware_ajaxcrawlable.py E302 E501
|
||||||
tests/test_downloadermiddleware_cookies.py E731 E741 E501 E128 E303 E265 E126
|
tests/test_downloadermiddleware_cookies.py E731 E741 E501 E128 E303 E265 E126
|
||||||
tests/test_downloadermiddleware_decompression.py E127
|
tests/test_downloadermiddleware_decompression.py E127
|
||||||
tests/test_downloadermiddleware_defaultheaders.py E501
|
tests/test_downloadermiddleware_defaultheaders.py E501
|
||||||
tests/test_downloadermiddleware_downloadtimeout.py E501
|
tests/test_downloadermiddleware_downloadtimeout.py E501
|
||||||
tests/test_downloadermiddleware_httpcache.py E713 E501 E302 E305 F401
|
tests/test_downloadermiddleware_httpcache.py E501 E302 E305 F401
|
||||||
tests/test_downloadermiddleware_httpcompression.py E501 F401 E251 E126 E123
|
tests/test_downloadermiddleware_httpcompression.py E501 F401 E251 E126 E123
|
||||||
tests/test_downloadermiddleware_httpproxy.py F401 E501 E128
|
tests/test_downloadermiddleware_httpproxy.py F401 E501 E128
|
||||||
tests/test_downloadermiddleware_redirect.py E501 E303 E128 E306 E127 E305
|
tests/test_downloadermiddleware_redirect.py E501 E303 E128 E306 E127 E305
|
||||||
@ -196,13 +213,13 @@ flake8-ignore =
|
|||||||
tests/test_feedexport.py E501 F401 F841 E241
|
tests/test_feedexport.py E501 F401 F841 E241
|
||||||
tests/test_http_cookies.py E501
|
tests/test_http_cookies.py E501
|
||||||
tests/test_http_headers.py E302 E501
|
tests/test_http_headers.py E302 E501
|
||||||
tests/test_http_request.py F401 E402 E501 E231 E261 E127 E128 W293 E502 E128 E502 E126 E123
|
tests/test_http_request.py F401 E402 E501 E261 E127 E128 W293 E502 E128 E502 E126 E123
|
||||||
tests/test_http_response.py E501 E301 E502 E128 E265
|
tests/test_http_response.py E501 E301 E502 E128 E265
|
||||||
tests/test_item.py E701 E128 E231 F841 E306
|
tests/test_item.py E701 E128 F841 E306
|
||||||
tests/test_link.py E501
|
tests/test_link.py E501
|
||||||
tests/test_linkextractors.py E501 E128 E231 E124
|
tests/test_linkextractors.py E501 E128 E124
|
||||||
tests/test_loader.py E302 E501 E731 E303 E741 E128 E117 E241
|
tests/test_loader.py E302 E501 E731 E303 E741 E128 E117 E241
|
||||||
tests/test_logformatter.py E128 E501 E231 E122 E302
|
tests/test_logformatter.py E128 E501 E122 E302
|
||||||
tests/test_mail.py E302 E128 E501 E305
|
tests/test_mail.py E302 E128 E501 E305
|
||||||
tests/test_middleware.py E302 E501 E128
|
tests/test_middleware.py E302 E501 E128
|
||||||
tests/test_pipeline_crawl.py E131 E501 E128 E126
|
tests/test_pipeline_crawl.py E131 E501 E128 E126
|
||||||
@ -221,8 +238,8 @@ flake8-ignore =
|
|||||||
tests/test_spidermiddleware_output_chain.py F401 E501 E302 W293 E226
|
tests/test_spidermiddleware_output_chain.py F401 E501 E302 W293 E226
|
||||||
tests/test_spidermiddleware_referer.py F401 E501 E302 F841 E125 E201 E261 E124 E501 E241 E121
|
tests/test_spidermiddleware_referer.py F401 E501 E302 F841 E125 E201 E261 E124 E501 E241 E121
|
||||||
tests/test_squeues.py E501 E302 E701 E741
|
tests/test_squeues.py E501 E302 E701 E741
|
||||||
tests/test_utils_conf.py E501 E231 E303 E128
|
tests/test_utils_conf.py E501 E303 E128
|
||||||
tests/test_utils_console.py E302 E231
|
tests/test_utils_console.py E302
|
||||||
tests/test_utils_curl.py E501
|
tests/test_utils_curl.py E501
|
||||||
tests/test_utils_datatypes.py E402 E501 E305
|
tests/test_utils_datatypes.py E402 E501 E305
|
||||||
tests/test_utils_defer.py E306 E261 E501 E302 F841 E226
|
tests/test_utils_defer.py E306 E261 E501 E302 F841 E226
|
||||||
@ -251,4 +268,4 @@ flake8-ignore =
|
|||||||
tests/test_spiderloader/test_spiders/spider2.py E302
|
tests/test_spiderloader/test_spiders/spider2.py E302
|
||||||
tests/test_spiderloader/test_spiders/spider3.py E302
|
tests/test_spiderloader/test_spiders/spider3.py E302
|
||||||
tests/test_spiderloader/test_spiders/nested/spider4.py E302
|
tests/test_spiderloader/test_spiders/nested/spider4.py E302
|
||||||
tests/test_utils_misc/__init__.py E501 E231
|
tests/test_utils_misc/__init__.py E501
|
||||||
|
@ -231,9 +231,9 @@ class Scraper(object):
|
|||||||
signal=signals.item_dropped, item=item, response=response,
|
signal=signals.item_dropped, item=item, response=response,
|
||||||
spider=spider, exception=output.value)
|
spider=spider, exception=output.value)
|
||||||
else:
|
else:
|
||||||
logger.error('Error processing %(item)s', {'item': item},
|
logkws = self.logformatter.error(item, ex, response, spider)
|
||||||
exc_info=failure_to_exc_info(output),
|
logger.log(*logformatter_adapter(logkws), extra={'spider': spider},
|
||||||
extra={'spider': spider})
|
exc_info=failure_to_exc_info(output))
|
||||||
return self.signals.send_catch_log_deferred(
|
return self.signals.send_catch_log_deferred(
|
||||||
signal=signals.item_error, item=item, response=response,
|
signal=signals.item_error, item=item, response=response,
|
||||||
spider=spider, failure=output)
|
spider=spider, failure=output)
|
||||||
|
@ -35,7 +35,7 @@ class SpiderMiddlewareManager(MiddlewareManager):
|
|||||||
self.methods['process_spider_exception'].appendleft(getattr(mw, 'process_spider_exception', None))
|
self.methods['process_spider_exception'].appendleft(getattr(mw, 'process_spider_exception', None))
|
||||||
|
|
||||||
def scrape_response(self, scrape_func, response, request, spider):
|
def scrape_response(self, scrape_func, response, request, spider):
|
||||||
fname = lambda f:'%s.%s' % (
|
fname = lambda f: '%s.%s' % (
|
||||||
f.__self__.__class__.__name__,
|
f.__self__.__class__.__name__,
|
||||||
f.__func__.__name__)
|
f.__func__.__name__)
|
||||||
|
|
||||||
|
@ -4,9 +4,9 @@ and extract the potentially compressed responses that may arrive.
|
|||||||
|
|
||||||
import bz2
|
import bz2
|
||||||
import gzip
|
import gzip
|
||||||
import zipfile
|
|
||||||
import tarfile
|
|
||||||
import logging
|
import logging
|
||||||
|
import tarfile
|
||||||
|
import zipfile
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from tempfile import mktemp
|
from tempfile import mktemp
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ class BaseItemExporter(object):
|
|||||||
def _configure(self, options, dont_fail=False):
|
def _configure(self, options, dont_fail=False):
|
||||||
"""Configure the exporter by poping options from the ``options`` dict.
|
"""Configure the exporter by poping options from the ``options`` dict.
|
||||||
If dont_fail is set, it won't raise an exception on unexpected options
|
If dont_fail is set, it won't raise an exception on unexpected options
|
||||||
(useful for using with keyword arguments in subclasses constructors)
|
(useful for using with keyword arguments in subclasses ``__init__`` methods)
|
||||||
"""
|
"""
|
||||||
self.encoding = options.pop('encoding', None)
|
self.encoding = options.pop('encoding', None)
|
||||||
self.fields_to_export = options.pop('fields_to_export', None)
|
self.fields_to_export = options.pop('fields_to_export', None)
|
||||||
|
@ -198,9 +198,9 @@ class FeedExporter(object):
|
|||||||
|
|
||||||
def __init__(self, settings):
|
def __init__(self, settings):
|
||||||
self.settings = settings
|
self.settings = settings
|
||||||
self.urifmt = settings['FEED_URI']
|
if not settings['FEED_URI']:
|
||||||
if not self.urifmt:
|
|
||||||
raise NotConfigured
|
raise NotConfigured
|
||||||
|
self.urifmt = str(settings['FEED_URI'])
|
||||||
self.format = settings['FEED_FORMAT'].lower()
|
self.format = settings['FEED_FORMAT'].lower()
|
||||||
self.export_encoding = settings['FEED_EXPORT_ENCODING']
|
self.export_encoding = settings['FEED_EXPORT_ENCODING']
|
||||||
self.storages = self._load_components('FEED_STORAGES')
|
self.storages = self._load_components('FEED_STORAGES')
|
||||||
|
@ -65,7 +65,7 @@ class Request(object_ref):
|
|||||||
s = safe_url_string(url, self.encoding)
|
s = safe_url_string(url, self.encoding)
|
||||||
self._url = escape_ajax(s)
|
self._url = escape_ajax(s)
|
||||||
|
|
||||||
if ':' not in self._url:
|
if ('://' not in self._url) and (not self._url.startswith('data:')):
|
||||||
raise ValueError('Missing scheme in request url: %s' % self._url)
|
raise ValueError('Missing scheme in request url: %s' % self._url)
|
||||||
|
|
||||||
url = property(_get_url, obsolete_setter(_set_url, 'url'))
|
url = property(_get_url, obsolete_setter(_set_url, 'url'))
|
||||||
|
@ -19,23 +19,26 @@ from scrapy.utils.url import (
|
|||||||
|
|
||||||
# common file extensions that are not followed if they occur in links
|
# common file extensions that are not followed if they occur in links
|
||||||
IGNORED_EXTENSIONS = [
|
IGNORED_EXTENSIONS = [
|
||||||
|
# archives
|
||||||
|
'7z', '7zip', 'bz2', 'rar', 'tar', 'tar.gz', 'xz', 'zip',
|
||||||
|
|
||||||
# images
|
# images
|
||||||
'mng', 'pct', 'bmp', 'gif', 'jpg', 'jpeg', 'png', 'pst', 'psp', 'tif',
|
'mng', 'pct', 'bmp', 'gif', 'jpg', 'jpeg', 'png', 'pst', 'psp', 'tif',
|
||||||
'tiff', 'ai', 'drw', 'dxf', 'eps', 'ps', 'svg',
|
'tiff', 'ai', 'drw', 'dxf', 'eps', 'ps', 'svg', 'cdr', 'ico',
|
||||||
|
|
||||||
# audio
|
# audio
|
||||||
'mp3', 'wma', 'ogg', 'wav', 'ra', 'aac', 'mid', 'au', 'aiff',
|
'mp3', 'wma', 'ogg', 'wav', 'ra', 'aac', 'mid', 'au', 'aiff',
|
||||||
|
|
||||||
# video
|
# video
|
||||||
'3gp', 'asf', 'asx', 'avi', 'mov', 'mp4', 'mpg', 'qt', 'rm', 'swf', 'wmv',
|
'3gp', 'asf', 'asx', 'avi', 'mov', 'mp4', 'mpg', 'qt', 'rm', 'swf', 'wmv',
|
||||||
'm4a', 'm4v', 'flv',
|
'm4a', 'm4v', 'flv', 'webm',
|
||||||
|
|
||||||
# office suites
|
# office suites
|
||||||
'xls', 'xlsx', 'ppt', 'pptx', 'pps', 'doc', 'docx', 'odt', 'ods', 'odg',
|
'xls', 'xlsx', 'ppt', 'pptx', 'pps', 'doc', 'docx', 'odt', 'ods', 'odg',
|
||||||
'odp',
|
'odp',
|
||||||
|
|
||||||
# other
|
# other
|
||||||
'css', 'pdf', 'exe', 'bin', 'rss', 'zip', 'rar',
|
'css', 'pdf', 'exe', 'bin', 'rss', 'dmg', 'iso', 'apk'
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -8,6 +8,7 @@ from scrapy.utils.request import referer_str
|
|||||||
SCRAPEDMSG = u"Scraped from %(src)s" + os.linesep + "%(item)s"
|
SCRAPEDMSG = u"Scraped from %(src)s" + os.linesep + "%(item)s"
|
||||||
DROPPEDMSG = u"Dropped: %(exception)s" + os.linesep + "%(item)s"
|
DROPPEDMSG = u"Dropped: %(exception)s" + os.linesep + "%(item)s"
|
||||||
CRAWLEDMSG = u"Crawled (%(status)s) %(request)s%(request_flags)s (referer: %(referer)s)%(response_flags)s"
|
CRAWLEDMSG = u"Crawled (%(status)s) %(request)s%(request_flags)s (referer: %(referer)s)%(response_flags)s"
|
||||||
|
ERRORMSG = u"'Error processing %(item)s'"
|
||||||
|
|
||||||
|
|
||||||
class LogFormatter(object):
|
class LogFormatter(object):
|
||||||
@ -92,6 +93,16 @@ class LogFormatter(object):
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def error(self, item, exception, response, spider):
|
||||||
|
"""Logs a message when an item causes an error while it is passing through the item pipeline."""
|
||||||
|
return {
|
||||||
|
'level': logging.ERROR,
|
||||||
|
'msg': ERRORMSG,
|
||||||
|
'args': {
|
||||||
|
'item': item,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_crawler(cls, crawler):
|
def from_crawler(cls, crawler):
|
||||||
return cls()
|
return cls()
|
||||||
|
@ -86,9 +86,6 @@ class _SlotPriorityQueues(object):
|
|||||||
def __len__(self):
|
def __len__(self):
|
||||||
return sum(len(x) for x in self.pqueues.values()) if self.pqueues else 0
|
return sum(len(x) for x in self.pqueues.values()) if self.pqueues else 0
|
||||||
|
|
||||||
def __contains__(self, slot):
|
|
||||||
return slot in self.pqueues
|
|
||||||
|
|
||||||
|
|
||||||
class ScrapyPriorityQueue(PriorityQueue):
|
class ScrapyPriorityQueue(PriorityQueue):
|
||||||
"""
|
"""
|
||||||
|
@ -5,9 +5,10 @@ Python Standard Library.
|
|||||||
This module must not depend on any module outside the Standard Library.
|
This module must not depend on any module outside the Standard Library.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import copy
|
|
||||||
import collections
|
import collections
|
||||||
|
import copy
|
||||||
import warnings
|
import warnings
|
||||||
|
from collections.abc import Mapping
|
||||||
|
|
||||||
from scrapy.exceptions import ScrapyDeprecationWarning
|
from scrapy.exceptions import ScrapyDeprecationWarning
|
||||||
|
|
||||||
@ -223,7 +224,7 @@ class CaselessDict(dict):
|
|||||||
return dict.setdefault(self, self.normkey(key), self.normvalue(def_val))
|
return dict.setdefault(self, self.normkey(key), self.normvalue(def_val))
|
||||||
|
|
||||||
def update(self, seq):
|
def update(self, seq):
|
||||||
seq = seq.items() if isinstance(seq, collections.abc.Mapping) else seq
|
seq = seq.items() if isinstance(seq, Mapping) else seq
|
||||||
iseq = ((self.normkey(k), self.normvalue(v)) for k, v in seq)
|
iseq = ((self.normkey(k), self.normvalue(v)) for k, v in seq)
|
||||||
super(CaselessDict, self).update(iseq)
|
super(CaselessDict, self).update(iseq)
|
||||||
|
|
||||||
@ -247,8 +248,9 @@ class LocalCache(collections.OrderedDict):
|
|||||||
self.limit = limit
|
self.limit = limit
|
||||||
|
|
||||||
def __setitem__(self, key, value):
|
def __setitem__(self, key, value):
|
||||||
while len(self) >= self.limit:
|
if self.limit:
|
||||||
self.popitem(last=False)
|
while len(self) >= self.limit:
|
||||||
|
self.popitem(last=False)
|
||||||
super(LocalCache, self).__setitem__(key, value)
|
super(LocalCache, self).__setitem__(key, value)
|
||||||
|
|
||||||
|
|
||||||
|
@ -296,7 +296,7 @@ class WeakKeyCache(object):
|
|||||||
def stringify_dict(dct_or_tuples, encoding='utf-8', keys_only=True):
|
def stringify_dict(dct_or_tuples, encoding='utf-8', keys_only=True):
|
||||||
"""Return a (new) dict with unicode keys (and values when "keys_only" is
|
"""Return a (new) dict with unicode keys (and values when "keys_only" is
|
||||||
False) of the given dict converted to strings. ``dct_or_tuples`` can be a
|
False) of the given dict converted to strings. ``dct_or_tuples`` can be a
|
||||||
dict or a list of tuples, like any dict constructor supports.
|
dict or a list of tuples, like any dict ``__init__`` method supports.
|
||||||
"""
|
"""
|
||||||
d = {}
|
d = {}
|
||||||
for k, v in dict(dct_or_tuples).items():
|
for k, v in dict(dct_or_tuples).items():
|
||||||
|
@ -3,10 +3,10 @@ from twisted.internet import reactor, error
|
|||||||
def listen_tcp(portrange, host, factory):
|
def listen_tcp(portrange, host, factory):
|
||||||
"""Like reactor.listenTCP but tries different ports in a range."""
|
"""Like reactor.listenTCP but tries different ports in a range."""
|
||||||
assert len(portrange) <= 2, "invalid portrange: %s" % portrange
|
assert len(portrange) <= 2, "invalid portrange: %s" % portrange
|
||||||
if not hasattr(portrange, '__iter__'):
|
|
||||||
return reactor.listenTCP(portrange, factory, interface=host)
|
|
||||||
if not portrange:
|
if not portrange:
|
||||||
return reactor.listenTCP(0, factory, interface=host)
|
return reactor.listenTCP(0, factory, interface=host)
|
||||||
|
if not hasattr(portrange, '__iter__'):
|
||||||
|
return reactor.listenTCP(portrange, factory, interface=host)
|
||||||
if len(portrange) == 1:
|
if len(portrange) == 1:
|
||||||
return reactor.listenTCP(portrange[0], factory, interface=host)
|
return reactor.listenTCP(portrange[0], factory, interface=host)
|
||||||
for x in range(portrange[0], portrange[1]+1):
|
for x in range(portrange[0], portrange[1]+1):
|
||||||
|
@ -38,7 +38,7 @@ singletons members of that object, as explained below:
|
|||||||
``scrapy.core.manager.ExecutionManager``) - instantiated with a ``Settings``
|
``scrapy.core.manager.ExecutionManager``) - instantiated with a ``Settings``
|
||||||
object
|
object
|
||||||
|
|
||||||
- **crawler.settings**: ``scrapy.conf.Settings`` instance (passed in the constructor)
|
- **crawler.settings**: ``scrapy.conf.Settings`` instance (passed in the ``__init__`` method)
|
||||||
- **crawler.extensions**: ``scrapy.extension.ExtensionManager`` instance
|
- **crawler.extensions**: ``scrapy.extension.ExtensionManager`` instance
|
||||||
- **crawler.engine**: ``scrapy.core.engine.ExecutionEngine`` instance
|
- **crawler.engine**: ``scrapy.core.engine.ExecutionEngine`` instance
|
||||||
- ``crawler.engine.scheduler``
|
- ``crawler.engine.scheduler``
|
||||||
@ -55,7 +55,7 @@ singletons members of that object, as explained below:
|
|||||||
``STATS_CLASS`` setting)
|
``STATS_CLASS`` setting)
|
||||||
- **crawler.log**: Logger class with methods replacing the current
|
- **crawler.log**: Logger class with methods replacing the current
|
||||||
``scrapy.log`` functions. Logging would be started (if enabled) on
|
``scrapy.log`` functions. Logging would be started (if enabled) on
|
||||||
``Crawler`` constructor, so no log starting functions are required.
|
``Crawler`` instantiation, so no log starting functions are required.
|
||||||
|
|
||||||
- ``crawler.log.msg``
|
- ``crawler.log.msg``
|
||||||
- **crawler.signals**: signal handling
|
- **crawler.signals**: signal handling
|
||||||
@ -69,12 +69,12 @@ Required code changes after singletons removal
|
|||||||
==============================================
|
==============================================
|
||||||
|
|
||||||
All components (extensions, middlewares, etc) will receive this ``Crawler``
|
All components (extensions, middlewares, etc) will receive this ``Crawler``
|
||||||
object in their constructors, and this will be the only mechanism for accessing
|
object in their ``__init__`` methods, and this will be the only mechanism for accessing
|
||||||
any other components (as opposed to importing each singleton from their
|
any other components (as opposed to importing each singleton from their
|
||||||
respective module). This will also serve to stabilize the core API, something
|
respective module). This will also serve to stabilize the core API, something
|
||||||
which we haven't documented so far (partly because of this).
|
which we haven't documented so far (partly because of this).
|
||||||
|
|
||||||
So, for a typical middleware constructor code, instead of this:
|
So, for a typical middleware ``__init__`` method code, instead of this:
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
@ -125,13 +125,13 @@ Open issues to resolve
|
|||||||
|
|
||||||
- Should we pass ``Settings`` object to ``ScrapyCommand.add_options()``?
|
- Should we pass ``Settings`` object to ``ScrapyCommand.add_options()``?
|
||||||
- How should spiders access settings?
|
- How should spiders access settings?
|
||||||
- Option 1. Pass ``Crawler`` object to spider constructors too
|
- Option 1. Pass ``Crawler`` object to spider ``__init__`` methods too
|
||||||
- pro: one way to access all components (settings and signals being the
|
- pro: one way to access all components (settings and signals being the
|
||||||
most relevant to spiders)
|
most relevant to spiders)
|
||||||
- con?: spider code can access (and control) any crawler component -
|
- con?: spider code can access (and control) any crawler component -
|
||||||
since we don't want to support spiders messing with the crawler (write
|
since we don't want to support spiders messing with the crawler (write
|
||||||
an extension or spider middleware if you need that)
|
an extension or spider middleware if you need that)
|
||||||
- Option 2. Pass ``Settings`` object to spider constructors, which would
|
- Option 2. Pass ``Settings`` object to spider ``__init__`` methods, which would
|
||||||
then be accessed through ``self.settings``, like logging which is accessed
|
then be accessed through ``self.settings``, like logging which is accessed
|
||||||
through ``self.log``
|
through ``self.log``
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@ pytest
|
|||||||
pytest-cov
|
pytest-cov
|
||||||
pytest-twisted
|
pytest-twisted
|
||||||
pytest-xdist
|
pytest-xdist
|
||||||
|
sybil
|
||||||
testfixtures
|
testfixtures
|
||||||
|
|
||||||
# optional for shell wrapper tests
|
# optional for shell wrapper tests
|
||||||
|
@ -614,7 +614,7 @@ class Http11MockServerTestCase(unittest.TestCase):
|
|||||||
crawler = get_crawler(SingleRequestSpider)
|
crawler = get_crawler(SingleRequestSpider)
|
||||||
yield crawler.crawl(seed=Request(url=self.mockserver.url('')))
|
yield crawler.crawl(seed=Request(url=self.mockserver.url('')))
|
||||||
failure = crawler.spider.meta.get('failure')
|
failure = crawler.spider.meta.get('failure')
|
||||||
self.assertTrue(failure == None)
|
self.assertTrue(failure is None)
|
||||||
reason = crawler.spider.meta['close_reason']
|
reason = crawler.spider.meta['close_reason']
|
||||||
self.assertTrue(reason, 'finished')
|
self.assertTrue(reason, 'finished')
|
||||||
|
|
||||||
@ -636,7 +636,7 @@ class Http11MockServerTestCase(unittest.TestCase):
|
|||||||
yield crawler.crawl(seed=request)
|
yield crawler.crawl(seed=request)
|
||||||
# download_maxsize = 50 is enough for the gzipped response
|
# download_maxsize = 50 is enough for the gzipped response
|
||||||
failure = crawler.spider.meta.get('failure')
|
failure = crawler.spider.meta.get('failure')
|
||||||
self.assertTrue(failure == None)
|
self.assertTrue(failure is None)
|
||||||
reason = crawler.spider.meta['close_reason']
|
reason = crawler.spider.meta['close_reason']
|
||||||
self.assertTrue(reason, 'finished')
|
self.assertTrue(reason, 'finished')
|
||||||
|
|
||||||
|
@ -84,8 +84,8 @@ class _BaseTest(unittest.TestCase):
|
|||||||
|
|
||||||
def assertEqualRequestButWithCacheValidators(self, request1, request2):
|
def assertEqualRequestButWithCacheValidators(self, request1, request2):
|
||||||
self.assertEqual(request1.url, request2.url)
|
self.assertEqual(request1.url, request2.url)
|
||||||
assert not b'If-None-Match' in request1.headers
|
assert b'If-None-Match' not in request1.headers
|
||||||
assert not b'If-Modified-Since' in request1.headers
|
assert b'If-Modified-Since' not in request1.headers
|
||||||
assert any(h in request2.headers for h in (b'If-None-Match', b'If-Modified-Since'))
|
assert any(h in request2.headers for h in (b'If-None-Match', b'If-Modified-Since'))
|
||||||
self.assertEqual(request1.body, request2.body)
|
self.assertEqual(request1.body, request2.body)
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@ import tempfile
|
|||||||
import shutil
|
import shutil
|
||||||
import string
|
import string
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
from pathlib import Path
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
from urllib.parse import urljoin, urlparse, quote
|
from urllib.parse import urljoin, urlparse, quote
|
||||||
from urllib.request import pathname2url
|
from urllib.request import pathname2url
|
||||||
@ -403,6 +404,7 @@ class FeedExportTest(unittest.TestCase):
|
|||||||
defaults = {
|
defaults = {
|
||||||
'FEED_URI': res_uri,
|
'FEED_URI': res_uri,
|
||||||
'FEED_FORMAT': 'csv',
|
'FEED_FORMAT': 'csv',
|
||||||
|
'FEED_PATH': res_path
|
||||||
}
|
}
|
||||||
defaults.update(settings or {})
|
defaults.update(settings or {})
|
||||||
try:
|
try:
|
||||||
@ -411,7 +413,7 @@ class FeedExportTest(unittest.TestCase):
|
|||||||
spider_cls.start_urls = [s.url('/')]
|
spider_cls.start_urls = [s.url('/')]
|
||||||
yield runner.crawl(spider_cls)
|
yield runner.crawl(spider_cls)
|
||||||
|
|
||||||
with open(res_path, 'rb') as f:
|
with open(str(defaults['FEED_PATH']), 'rb') as f:
|
||||||
content = f.read()
|
content = f.read()
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
@ -841,3 +843,17 @@ class FeedExportTest(unittest.TestCase):
|
|||||||
yield self.exported_data({}, settings)
|
yield self.exported_data({}, settings)
|
||||||
self.assertTrue(FromCrawlerCsvItemExporter.init_with_crawler)
|
self.assertTrue(FromCrawlerCsvItemExporter.init_with_crawler)
|
||||||
self.assertTrue(FromCrawlerFileFeedStorage.init_with_crawler)
|
self.assertTrue(FromCrawlerFileFeedStorage.init_with_crawler)
|
||||||
|
|
||||||
|
@defer.inlineCallbacks
|
||||||
|
def test_pathlib_uri(self):
|
||||||
|
tmpdir = tempfile.mkdtemp()
|
||||||
|
feed_uri = Path(tmpdir) / 'res'
|
||||||
|
settings = {
|
||||||
|
'FEED_FORMAT': 'csv',
|
||||||
|
'FEED_STORE_EMPTY': True,
|
||||||
|
'FEED_URI': feed_uri,
|
||||||
|
'FEED_PATH': feed_uri
|
||||||
|
}
|
||||||
|
data = yield self.exported_no_data(settings)
|
||||||
|
self.assertEqual(data, b'')
|
||||||
|
shutil.rmtree(tmpdir, ignore_errors=True)
|
||||||
|
@ -3,7 +3,7 @@ import cgi
|
|||||||
import unittest
|
import unittest
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
import xmlrpc.client as xmlrpclib
|
import xmlrpc.client
|
||||||
import warnings
|
import warnings
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
from urllib.parse import parse_qs, unquote_to_bytes, urlparse
|
from urllib.parse import parse_qs, unquote_to_bytes, urlparse
|
||||||
@ -20,7 +20,7 @@ class RequestTest(unittest.TestCase):
|
|||||||
default_meta = {}
|
default_meta = {}
|
||||||
|
|
||||||
def test_init(self):
|
def test_init(self):
|
||||||
# Request requires url in the constructor
|
# Request requires url in the __init__ method
|
||||||
self.assertRaises(Exception, self.request_class)
|
self.assertRaises(Exception, self.request_class)
|
||||||
|
|
||||||
# url argument must be basestring
|
# url argument must be basestring
|
||||||
@ -47,11 +47,13 @@ class RequestTest(unittest.TestCase):
|
|||||||
|
|
||||||
def test_url_no_scheme(self):
|
def test_url_no_scheme(self):
|
||||||
self.assertRaises(ValueError, self.request_class, 'foo')
|
self.assertRaises(ValueError, self.request_class, 'foo')
|
||||||
|
self.assertRaises(ValueError, self.request_class, '/foo/')
|
||||||
|
self.assertRaises(ValueError, self.request_class, '/foo:bar')
|
||||||
|
|
||||||
def test_headers(self):
|
def test_headers(self):
|
||||||
# Different ways of setting headers attribute
|
# Different ways of setting headers attribute
|
||||||
url = 'http://www.scrapy.org'
|
url = 'http://www.scrapy.org'
|
||||||
headers = {b'Accept':'gzip', b'Custom-Header':'nothing to tell you'}
|
headers = {b'Accept': 'gzip', b'Custom-Header': 'nothing to tell you'}
|
||||||
r = self.request_class(url=url, headers=headers)
|
r = self.request_class(url=url, headers=headers)
|
||||||
p = self.request_class(url=url, headers=r.headers)
|
p = self.request_class(url=url, headers=r.headers)
|
||||||
|
|
||||||
@ -495,7 +497,7 @@ class FormRequestTest(RequestTest):
|
|||||||
formdata=(('foo', 'bar'), ('foo', 'baz')))
|
formdata=(('foo', 'bar'), ('foo', 'baz')))
|
||||||
self.assertEqual(urlparse(req.url).hostname, 'www.example.com')
|
self.assertEqual(urlparse(req.url).hostname, 'www.example.com')
|
||||||
self.assertEqual(urlparse(req.url).query, 'foo=bar&foo=baz')
|
self.assertEqual(urlparse(req.url).query, 'foo=bar&foo=baz')
|
||||||
|
|
||||||
def test_from_response_override_duplicate_form_key(self):
|
def test_from_response_override_duplicate_form_key(self):
|
||||||
response = _buildresponse(
|
response = _buildresponse(
|
||||||
"""<form action="get.php" method="POST">
|
"""<form action="get.php" method="POST">
|
||||||
@ -652,7 +654,7 @@ class FormRequestTest(RequestTest):
|
|||||||
req = self.request_class.from_response(response, dont_click=True)
|
req = self.request_class.from_response(response, dont_click=True)
|
||||||
fs = _qs(req)
|
fs = _qs(req)
|
||||||
self.assertEqual(fs, {b'i1': [b'i1v'], b'i2': [b'i2v']})
|
self.assertEqual(fs, {b'i1': [b'i1v'], b'i2': [b'i2v']})
|
||||||
|
|
||||||
def test_from_response_clickdata_does_not_ignore_image(self):
|
def test_from_response_clickdata_does_not_ignore_image(self):
|
||||||
response = _buildresponse(
|
response = _buildresponse(
|
||||||
"""<form>
|
"""<form>
|
||||||
@ -811,7 +813,7 @@ class FormRequestTest(RequestTest):
|
|||||||
<input type="hidden" name="one" value="1">
|
<input type="hidden" name="one" value="1">
|
||||||
<input type="hidden" name="two" value="2">
|
<input type="hidden" name="two" value="2">
|
||||||
</form>""")
|
</form>""")
|
||||||
r1 = self.request_class.from_response(response, formdata={'two':'3'})
|
r1 = self.request_class.from_response(response, formdata={'two': '3'})
|
||||||
self.assertEqual(r1.method, 'POST')
|
self.assertEqual(r1.method, 'POST')
|
||||||
self.assertEqual(r1.headers['Content-type'], b'application/x-www-form-urlencoded')
|
self.assertEqual(r1.headers['Content-type'], b'application/x-www-form-urlencoded')
|
||||||
fs = _qs(r1)
|
fs = _qs(r1)
|
||||||
@ -1218,7 +1220,7 @@ class XmlRpcRequestTest(RequestTest):
|
|||||||
r = self.request_class('http://scrapytest.org/rpc2', **kwargs)
|
r = self.request_class('http://scrapytest.org/rpc2', **kwargs)
|
||||||
self.assertEqual(r.headers[b'Content-Type'], b'text/xml')
|
self.assertEqual(r.headers[b'Content-Type'], b'text/xml')
|
||||||
self.assertEqual(r.body,
|
self.assertEqual(r.body,
|
||||||
to_bytes(xmlrpclib.dumps(**kwargs),
|
to_bytes(xmlrpc.client.dumps(**kwargs),
|
||||||
encoding=kwargs.get('encoding', 'utf-8')))
|
encoding=kwargs.get('encoding', 'utf-8')))
|
||||||
self.assertEqual(r.method, 'POST')
|
self.assertEqual(r.method, 'POST')
|
||||||
self.assertEqual(r.encoding, kwargs.get('encoding', 'utf-8'))
|
self.assertEqual(r.encoding, kwargs.get('encoding', 'utf-8'))
|
||||||
|
@ -532,7 +532,7 @@ class XmlResponseTest(TextResponseTest):
|
|||||||
r2 = self.response_class("http://www.example.com", body=body)
|
r2 = self.response_class("http://www.example.com", body=body)
|
||||||
self._assert_response_values(r2, 'iso-8859-1', body)
|
self._assert_response_values(r2, 'iso-8859-1', body)
|
||||||
|
|
||||||
# make sure replace() preserves the explicit encoding passed in the constructor
|
# make sure replace() preserves the explicit encoding passed in the __init__ method
|
||||||
body = b"""<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>"""
|
body = b"""<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>"""
|
||||||
r3 = self.response_class("http://www.example.com", body=body, encoding='utf-8')
|
r3 = self.response_class("http://www.example.com", body=body, encoding='utf-8')
|
||||||
body2 = b"New body"
|
body2 = b"New body"
|
||||||
|
@ -239,7 +239,7 @@ class ItemTest(unittest.TestCase):
|
|||||||
def test_copy(self):
|
def test_copy(self):
|
||||||
class TestItem(Item):
|
class TestItem(Item):
|
||||||
name = Field()
|
name = Field()
|
||||||
item = TestItem({'name':'lower'})
|
item = TestItem({'name': 'lower'})
|
||||||
copied_item = item.copy()
|
copied_item = item.copy()
|
||||||
self.assertNotEqual(id(item), id(copied_item))
|
self.assertNotEqual(id(item), id(copied_item))
|
||||||
copied_item['name'] = copied_item['name'].upper()
|
copied_item['name'] = copied_item['name'].upper()
|
||||||
|
@ -43,6 +43,6 @@ class LinkTest(unittest.TestCase):
|
|||||||
l2 = eval(repr(l1))
|
l2 = eval(repr(l1))
|
||||||
self._assert_same_links(l1, l2)
|
self._assert_same_links(l1, l2)
|
||||||
|
|
||||||
def test_non_str_url_py2(self):
|
def test_bytes_url(self):
|
||||||
with self.assertRaises(TypeError):
|
with self.assertRaises(TypeError):
|
||||||
Link(b"http://www.example.com/\xc2\xa3")
|
Link(b"http://www.example.com/\xc2\xa3")
|
||||||
|
@ -322,7 +322,7 @@ class Base:
|
|||||||
Link(url=page4_url, text=u'href with whitespaces'),
|
Link(url=page4_url, text=u'href with whitespaces'),
|
||||||
])
|
])
|
||||||
|
|
||||||
lx = self.extractor_cls(attrs=("href","src"), tags=("a","area","img"), deny_extensions=())
|
lx = self.extractor_cls(attrs=("href", "src"), tags=("a", "area", "img"), deny_extensions=())
|
||||||
self.assertEqual(lx.extract_links(self.response), [
|
self.assertEqual(lx.extract_links(self.response), [
|
||||||
Link(url='http://example.com/sample1.html', text=u''),
|
Link(url='http://example.com/sample1.html', text=u''),
|
||||||
Link(url='http://example.com/sample2.html', text=u'sample 2'),
|
Link(url='http://example.com/sample2.html', text=u'sample 2'),
|
||||||
@ -360,7 +360,7 @@ class Base:
|
|||||||
Link(url='http://example.com/sample2.html', text=u'sample 2'),
|
Link(url='http://example.com/sample2.html', text=u'sample 2'),
|
||||||
])
|
])
|
||||||
|
|
||||||
lx = self.extractor_cls(tags=("a","img"), attrs=("href", "src"), deny_extensions=())
|
lx = self.extractor_cls(tags=("a", "img"), attrs=("href", "src"), deny_extensions=())
|
||||||
self.assertEqual(lx.extract_links(response), [
|
self.assertEqual(lx.extract_links(response), [
|
||||||
Link(url='http://example.com/sample2.html', text=u'sample 2'),
|
Link(url='http://example.com/sample2.html', text=u'sample 2'),
|
||||||
Link(url='http://example.com/sample2.jpg', text=u''),
|
Link(url='http://example.com/sample2.jpg', text=u''),
|
||||||
|
@ -725,11 +725,11 @@ class SelectortemLoaderTest(unittest.TestCase):
|
|||||||
</html>
|
</html>
|
||||||
""")
|
""")
|
||||||
|
|
||||||
def test_constructor(self):
|
def test_init_method(self):
|
||||||
l = TestItemLoader()
|
l = TestItemLoader()
|
||||||
self.assertEqual(l.selector, None)
|
self.assertEqual(l.selector, None)
|
||||||
|
|
||||||
def test_constructor_errors(self):
|
def test_init_method_errors(self):
|
||||||
l = TestItemLoader()
|
l = TestItemLoader()
|
||||||
self.assertRaises(RuntimeError, l.add_xpath, 'url', '//a/@href')
|
self.assertRaises(RuntimeError, l.add_xpath, 'url', '//a/@href')
|
||||||
self.assertRaises(RuntimeError, l.replace_xpath, 'url', '//a/@href')
|
self.assertRaises(RuntimeError, l.replace_xpath, 'url', '//a/@href')
|
||||||
@ -738,7 +738,7 @@ class SelectortemLoaderTest(unittest.TestCase):
|
|||||||
self.assertRaises(RuntimeError, l.replace_css, 'name', '#name::text')
|
self.assertRaises(RuntimeError, l.replace_css, 'name', '#name::text')
|
||||||
self.assertRaises(RuntimeError, l.get_css, '#name::text')
|
self.assertRaises(RuntimeError, l.get_css, '#name::text')
|
||||||
|
|
||||||
def test_constructor_with_selector(self):
|
def test_init_method_with_selector(self):
|
||||||
sel = Selector(text=u"<html><body><div>marta</div></body></html>")
|
sel = Selector(text=u"<html><body><div>marta</div></body></html>")
|
||||||
l = TestItemLoader(selector=sel)
|
l = TestItemLoader(selector=sel)
|
||||||
self.assertIs(l.selector, sel)
|
self.assertIs(l.selector, sel)
|
||||||
@ -746,7 +746,7 @@ class SelectortemLoaderTest(unittest.TestCase):
|
|||||||
l.add_xpath('name', '//div/text()')
|
l.add_xpath('name', '//div/text()')
|
||||||
self.assertEqual(l.get_output_value('name'), [u'Marta'])
|
self.assertEqual(l.get_output_value('name'), [u'Marta'])
|
||||||
|
|
||||||
def test_constructor_with_selector_css(self):
|
def test_init_method_with_selector_css(self):
|
||||||
sel = Selector(text=u"<html><body><div>marta</div></body></html>")
|
sel = Selector(text=u"<html><body><div>marta</div></body></html>")
|
||||||
l = TestItemLoader(selector=sel)
|
l = TestItemLoader(selector=sel)
|
||||||
self.assertIs(l.selector, sel)
|
self.assertIs(l.selector, sel)
|
||||||
@ -754,14 +754,14 @@ class SelectortemLoaderTest(unittest.TestCase):
|
|||||||
l.add_css('name', 'div::text')
|
l.add_css('name', 'div::text')
|
||||||
self.assertEqual(l.get_output_value('name'), [u'Marta'])
|
self.assertEqual(l.get_output_value('name'), [u'Marta'])
|
||||||
|
|
||||||
def test_constructor_with_response(self):
|
def test_init_method_with_response(self):
|
||||||
l = TestItemLoader(response=self.response)
|
l = TestItemLoader(response=self.response)
|
||||||
self.assertTrue(l.selector)
|
self.assertTrue(l.selector)
|
||||||
|
|
||||||
l.add_xpath('name', '//div/text()')
|
l.add_xpath('name', '//div/text()')
|
||||||
self.assertEqual(l.get_output_value('name'), [u'Marta'])
|
self.assertEqual(l.get_output_value('name'), [u'Marta'])
|
||||||
|
|
||||||
def test_constructor_with_response_css(self):
|
def test_init_method_with_response_css(self):
|
||||||
l = TestItemLoader(response=self.response)
|
l = TestItemLoader(response=self.response)
|
||||||
self.assertTrue(l.selector)
|
self.assertTrue(l.selector)
|
||||||
|
|
||||||
|
@ -22,13 +22,13 @@ class CustomItem(Item):
|
|||||||
return "name: %s" % self['name']
|
return "name: %s" % self['name']
|
||||||
|
|
||||||
|
|
||||||
class LoggingContribTest(unittest.TestCase):
|
class LogFormatterTestCase(unittest.TestCase):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.formatter = LogFormatter()
|
self.formatter = LogFormatter()
|
||||||
self.spider = Spider('default')
|
self.spider = Spider('default')
|
||||||
|
|
||||||
def test_crawled(self):
|
def test_crawled_with_referer(self):
|
||||||
req = Request("http://www.example.com")
|
req = Request("http://www.example.com")
|
||||||
res = Response("http://www.example.com")
|
res = Response("http://www.example.com")
|
||||||
logkws = self.formatter.crawled(req, res, self.spider)
|
logkws = self.formatter.crawled(req, res, self.spider)
|
||||||
@ -36,6 +36,7 @@ class LoggingContribTest(unittest.TestCase):
|
|||||||
self.assertEqual(logline,
|
self.assertEqual(logline,
|
||||||
"Crawled (200) <GET http://www.example.com> (referer: None)")
|
"Crawled (200) <GET http://www.example.com> (referer: None)")
|
||||||
|
|
||||||
|
def test_crawled_without_referer(self):
|
||||||
req = Request("http://www.example.com", headers={'referer': 'http://example.com'})
|
req = Request("http://www.example.com", headers={'referer': 'http://example.com'})
|
||||||
res = Response("http://www.example.com", flags=['cached'])
|
res = Response("http://www.example.com", flags=['cached'])
|
||||||
logkws = self.formatter.crawled(req, res, self.spider)
|
logkws = self.formatter.crawled(req, res, self.spider)
|
||||||
@ -44,7 +45,7 @@ class LoggingContribTest(unittest.TestCase):
|
|||||||
"Crawled (200) <GET http://www.example.com> (referer: http://example.com) ['cached']")
|
"Crawled (200) <GET http://www.example.com> (referer: http://example.com) ['cached']")
|
||||||
|
|
||||||
def test_flags_in_request(self):
|
def test_flags_in_request(self):
|
||||||
req = Request("http://www.example.com", flags=['test','flag'])
|
req = Request("http://www.example.com", flags=['test', 'flag'])
|
||||||
res = Response("http://www.example.com")
|
res = Response("http://www.example.com")
|
||||||
logkws = self.formatter.crawled(req, res, self.spider)
|
logkws = self.formatter.crawled(req, res, self.spider)
|
||||||
logline = logkws['msg'] % logkws['args']
|
logline = logkws['msg'] % logkws['args']
|
||||||
@ -61,6 +62,16 @@ class LoggingContribTest(unittest.TestCase):
|
|||||||
assert all(isinstance(x, str) for x in lines)
|
assert all(isinstance(x, str) for x in lines)
|
||||||
self.assertEqual(lines, [u"Dropped: \u2018", '{}'])
|
self.assertEqual(lines, [u"Dropped: \u2018", '{}'])
|
||||||
|
|
||||||
|
def test_error(self):
|
||||||
|
# In practice, the complete traceback is shown by passing the
|
||||||
|
# 'exc_info' argument to the logging function
|
||||||
|
item = {'key': 'value'}
|
||||||
|
exception = Exception()
|
||||||
|
response = Response("http://www.example.com")
|
||||||
|
logkws = self.formatter.error(item, exception, response, self.spider)
|
||||||
|
logline = logkws['msg'] % logkws['args']
|
||||||
|
self.assertEqual(logline, u"'Error processing {'key': 'value'}'")
|
||||||
|
|
||||||
def test_scraped(self):
|
def test_scraped(self):
|
||||||
item = CustomItem()
|
item = CustomItem()
|
||||||
item['name'] = u'\xa3'
|
item['name'] = u'\xa3'
|
||||||
@ -74,26 +85,46 @@ class LoggingContribTest(unittest.TestCase):
|
|||||||
|
|
||||||
class LogFormatterSubclass(LogFormatter):
|
class LogFormatterSubclass(LogFormatter):
|
||||||
def crawled(self, request, response, spider):
|
def crawled(self, request, response, spider):
|
||||||
kwargs = super(LogFormatterSubclass, self).crawled(
|
kwargs = super(LogFormatterSubclass, self).crawled(request, response, spider)
|
||||||
request, response, spider)
|
|
||||||
CRAWLEDMSG = (
|
CRAWLEDMSG = (
|
||||||
u"Crawled (%(status)s) %(request)s (referer: "
|
u"Crawled (%(status)s) %(request)s (referer: %(referer)s) %(flags)s"
|
||||||
u"%(referer)s)%(flags)s"
|
|
||||||
)
|
)
|
||||||
|
log_args = kwargs['args']
|
||||||
|
log_args['flags'] = str(request.flags)
|
||||||
return {
|
return {
|
||||||
'level': kwargs['level'],
|
'level': kwargs['level'],
|
||||||
'msg': CRAWLEDMSG,
|
'msg': CRAWLEDMSG,
|
||||||
'args': kwargs['args']
|
'args': log_args,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class LogformatterSubclassTest(LoggingContribTest):
|
class LogformatterSubclassTest(LogFormatterTestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.formatter = LogFormatterSubclass()
|
self.formatter = LogFormatterSubclass()
|
||||||
self.spider = Spider('default')
|
self.spider = Spider('default')
|
||||||
|
|
||||||
|
def test_crawled_with_referer(self):
|
||||||
|
req = Request("http://www.example.com")
|
||||||
|
res = Response("http://www.example.com")
|
||||||
|
logkws = self.formatter.crawled(req, res, self.spider)
|
||||||
|
logline = logkws['msg'] % logkws['args']
|
||||||
|
self.assertEqual(logline,
|
||||||
|
"Crawled (200) <GET http://www.example.com> (referer: None) []")
|
||||||
|
|
||||||
|
def test_crawled_without_referer(self):
|
||||||
|
req = Request("http://www.example.com", headers={'referer': 'http://example.com'}, flags=['cached'])
|
||||||
|
res = Response("http://www.example.com")
|
||||||
|
logkws = self.formatter.crawled(req, res, self.spider)
|
||||||
|
logline = logkws['msg'] % logkws['args']
|
||||||
|
self.assertEqual(logline,
|
||||||
|
"Crawled (200) <GET http://www.example.com> (referer: http://example.com) ['cached']")
|
||||||
|
|
||||||
def test_flags_in_request(self):
|
def test_flags_in_request(self):
|
||||||
pass
|
req = Request("http://www.example.com", flags=['test', 'flag'])
|
||||||
|
res = Response("http://www.example.com")
|
||||||
|
logkws = self.formatter.crawled(req, res, self.spider)
|
||||||
|
logline = logkws['msg'] % logkws['args']
|
||||||
|
self.assertEqual(logline, "Crawled (200) <GET http://www.example.com> (referer: None) ['test', 'flag']")
|
||||||
|
|
||||||
|
|
||||||
class SkipMessagesLogFormatter(LogFormatter):
|
class SkipMessagesLogFormatter(LogFormatter):
|
||||||
|
@ -41,12 +41,12 @@ class SpiderTest(unittest.TestCase):
|
|||||||
self.assertEqual(list(start_requests), [])
|
self.assertEqual(list(start_requests), [])
|
||||||
|
|
||||||
def test_spider_args(self):
|
def test_spider_args(self):
|
||||||
"""Constructor arguments are assigned to spider attributes"""
|
"""``__init__`` method arguments are assigned to spider attributes"""
|
||||||
spider = self.spider_class('example.com', foo='bar')
|
spider = self.spider_class('example.com', foo='bar')
|
||||||
self.assertEqual(spider.foo, 'bar')
|
self.assertEqual(spider.foo, 'bar')
|
||||||
|
|
||||||
def test_spider_without_name(self):
|
def test_spider_without_name(self):
|
||||||
"""Constructor arguments are assigned to spider attributes"""
|
"""``__init__`` method arguments are assigned to spider attributes"""
|
||||||
self.assertRaises(ValueError, self.spider_class)
|
self.assertRaises(ValueError, self.spider_class)
|
||||||
self.assertRaises(ValueError, self.spider_class, somearg='foo')
|
self.assertRaises(ValueError, self.spider_class, somearg='foo')
|
||||||
|
|
||||||
|
@ -79,7 +79,7 @@ class BuildComponentListTest(unittest.TestCase):
|
|||||||
self.assertRaises(ValueError, build_component_list, {}, d, convert=lambda x: x)
|
self.assertRaises(ValueError, build_component_list, {}, d, convert=lambda x: x)
|
||||||
d = {'one': {'a': 'a', 'b': 2}}
|
d = {'one': {'a': 'a', 'b': 2}}
|
||||||
self.assertRaises(ValueError, build_component_list, {}, d, convert=lambda x: x)
|
self.assertRaises(ValueError, build_component_list, {}, d, convert=lambda x: x)
|
||||||
d = {'one': 'lorem ipsum',}
|
d = {'one': 'lorem ipsum'}
|
||||||
self.assertRaises(ValueError, build_component_list, {}, d, convert=lambda x: x)
|
self.assertRaises(ValueError, build_component_list, {}, d, convert=lambda x: x)
|
||||||
|
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@ class UtilsConsoleTestCase(unittest.TestCase):
|
|||||||
shell = get_shell_embed_func(['invalid'])
|
shell = get_shell_embed_func(['invalid'])
|
||||||
self.assertEqual(shell, None)
|
self.assertEqual(shell, None)
|
||||||
|
|
||||||
shell = get_shell_embed_func(['invalid','python'])
|
shell = get_shell_embed_func(['invalid', 'python'])
|
||||||
self.assertTrue(callable(shell))
|
self.assertTrue(callable(shell))
|
||||||
self.assertEqual(shell.__name__, '_embed_standard_shell')
|
self.assertEqual(shell.__name__, '_embed_standard_shell')
|
||||||
|
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
from collections.abc import Mapping, MutableMapping
|
|
||||||
import copy
|
import copy
|
||||||
import unittest
|
import unittest
|
||||||
|
from collections.abc import Mapping, MutableMapping
|
||||||
|
|
||||||
from scrapy.utils.datatypes import CaselessDict, SequenceExclude
|
from scrapy.utils.datatypes import CaselessDict, LocalCache, SequenceExclude
|
||||||
|
|
||||||
|
|
||||||
__doctests__ = ['scrapy.utils.datatypes']
|
__doctests__ = ['scrapy.utils.datatypes']
|
||||||
@ -229,5 +229,31 @@ class SequenceExcludeTest(unittest.TestCase):
|
|||||||
for v in [-3, "test", 1.1]:
|
for v in [-3, "test", 1.1]:
|
||||||
self.assertNotIn(v, d)
|
self.assertNotIn(v, d)
|
||||||
|
|
||||||
|
|
||||||
|
class LocalCacheTest(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_cache_with_limit(self):
|
||||||
|
cache = LocalCache(limit=2)
|
||||||
|
cache['a'] = 1
|
||||||
|
cache['b'] = 2
|
||||||
|
cache['c'] = 3
|
||||||
|
self.assertEqual(len(cache), 2)
|
||||||
|
self.assertNotIn('a', cache)
|
||||||
|
self.assertIn('b', cache)
|
||||||
|
self.assertIn('c', cache)
|
||||||
|
self.assertEqual(cache['b'], 2)
|
||||||
|
self.assertEqual(cache['c'], 3)
|
||||||
|
|
||||||
|
def test_cache_without_limit(self):
|
||||||
|
maximum = 10**4
|
||||||
|
cache = LocalCache()
|
||||||
|
for x in range(maximum):
|
||||||
|
cache[str(x)] = x
|
||||||
|
self.assertEqual(len(cache), maximum)
|
||||||
|
for x in range(maximum):
|
||||||
|
self.assertIn(str(x), cache)
|
||||||
|
self.assertEqual(cache[str(x)], x)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -74,7 +74,7 @@ class UtilsMiscTestCase(unittest.TestCase):
|
|||||||
self.assertEqual(list(arg_to_iter(100)), [100])
|
self.assertEqual(list(arg_to_iter(100)), [100])
|
||||||
self.assertEqual(list(arg_to_iter(l for l in 'abc')), ['a', 'b', 'c'])
|
self.assertEqual(list(arg_to_iter(l for l in 'abc')), ['a', 'b', 'c'])
|
||||||
self.assertEqual(list(arg_to_iter([1, 2, 3])), [1, 2, 3])
|
self.assertEqual(list(arg_to_iter([1, 2, 3])), [1, 2, 3])
|
||||||
self.assertEqual(list(arg_to_iter({'a':1})), [{'a': 1}])
|
self.assertEqual(list(arg_to_iter({'a': 1})), [{'a': 1}])
|
||||||
self.assertEqual(list(arg_to_iter(TestItem(name="john"))), [TestItem(name="john")])
|
self.assertEqual(list(arg_to_iter(TestItem(name="john"))), [TestItem(name="john")])
|
||||||
|
|
||||||
def test_create_instance(self):
|
def test_create_instance(self):
|
||||||
|
@ -205,10 +205,10 @@ class UtilsPythonTestCase(unittest.TestCase):
|
|||||||
self.assertEqual(get_func_args(operator.itemgetter(2)), [])
|
self.assertEqual(get_func_args(operator.itemgetter(2)), [])
|
||||||
else:
|
else:
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
get_func_args(str.split, True), ['sep', 'maxsplit'])
|
get_func_args(str.split, stripself=True), ['sep', 'maxsplit'])
|
||||||
self.assertEqual(get_func_args(" ".join, True), ['list'])
|
self.assertEqual(get_func_args(" ".join, stripself=True), ['list'])
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
get_func_args(operator.itemgetter(2), True), ['obj'])
|
get_func_args(operator.itemgetter(2), stripself=True), ['obj'])
|
||||||
|
|
||||||
|
|
||||||
def test_without_none_values(self):
|
def test_without_none_values(self):
|
||||||
|
13
tox.ini
13
tox.ini
@ -21,7 +21,7 @@ passenv =
|
|||||||
GCS_TEST_FILE_URI
|
GCS_TEST_FILE_URI
|
||||||
GCS_PROJECT_ID
|
GCS_PROJECT_ID
|
||||||
commands =
|
commands =
|
||||||
py.test --cov=scrapy --cov-report= {posargs:scrapy tests}
|
py.test --cov=scrapy --cov-report= {posargs:--durations=10 docs scrapy tests}
|
||||||
|
|
||||||
[testenv:py35]
|
[testenv:py35]
|
||||||
basepython = python3.5
|
basepython = python3.5
|
||||||
@ -60,7 +60,14 @@ basepython = python3.8
|
|||||||
[testenv:pypy3]
|
[testenv:pypy3]
|
||||||
basepython = pypy3
|
basepython = pypy3
|
||||||
commands =
|
commands =
|
||||||
py.test {posargs:scrapy tests}
|
py.test {posargs:--durations=10 docs scrapy tests}
|
||||||
|
|
||||||
|
[testenv:security]
|
||||||
|
basepython = python3.8
|
||||||
|
deps =
|
||||||
|
bandit
|
||||||
|
commands =
|
||||||
|
bandit -r -c .bandit.yml {posargs:scrapy}
|
||||||
|
|
||||||
[testenv:flake8]
|
[testenv:flake8]
|
||||||
basepython = python3.8
|
basepython = python3.8
|
||||||
@ -68,7 +75,7 @@ deps =
|
|||||||
{[testenv]deps}
|
{[testenv]deps}
|
||||||
pytest-flake8
|
pytest-flake8
|
||||||
commands =
|
commands =
|
||||||
py.test --flake8 {posargs:scrapy tests}
|
py.test --flake8 {posargs:docs scrapy tests}
|
||||||
|
|
||||||
[docs]
|
[docs]
|
||||||
changedir = docs
|
changedir = docs
|
||||||
|
Loading…
x
Reference in New Issue
Block a user