From 84fe4011b0063dae1a8efbcb563e772d3b9fce09 Mon Sep 17 00:00:00 2001 From: Kevin Lloyd Bernal Date: Wed, 23 Oct 2019 20:39:53 +0800 Subject: [PATCH 1/6] update docs of scrapy.loader.ItemLoader.item --- docs/topics/loaders.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/topics/loaders.rst b/docs/topics/loaders.rst index 1c2f1da4d..4bd564014 100644 --- a/docs/topics/loaders.rst +++ b/docs/topics/loaders.rst @@ -485,6 +485,8 @@ ItemLoader objects .. attribute:: item The :class:`~scrapy.item.Item` object being parsed by this Item Loader. + This is mostly used as a property so when attempting to override this + value, you may want to check out :attr:`default_item_class` first. .. attribute:: context From b73fc99b60ed83be403e9570e84f5267d35dcc9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Tue, 26 Nov 2019 10:31:55 +0100 Subject: [PATCH 2/6] Use InterSphinx for coverage links --- docs/conf.py | 1 + docs/contributing.rst | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index eab366efd..914d1d05f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -275,6 +275,7 @@ coverage_ignore_pyobjects = [ # ------------------------------------- intersphinx_mapping = { + 'coverage': ('https://coverage.readthedocs.io/en/stable', None), 'pytest': ('https://docs.pytest.org/en/latest', None), 'python': ('https://docs.python.org/3', None), 'sphinx': ('https://www.sphinx-doc.org/en/master', None), diff --git a/docs/contributing.rst b/docs/contributing.rst index 81bb50a77..234c4bcee 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -243,14 +243,13 @@ the Python 3.6 :doc:`tox ` environment using all your CPU cores:: tox -e py36 -- scrapy tests -n auto -To see coverage report install `coverage`_ (``pip install coverage``) and run: +To see coverage report install :doc:`coverage ` +(``pip install coverage``) and run: ``coverage report`` see output of ``coverage --help`` for more options like html or xml report. -.. _coverage: https://pypi.python.org/pypi/coverage - Writing tests ------------- From d1cdfb47013330b0391a8db3b6b812697ee64b6a Mon Sep 17 00:00:00 2001 From: Grammy Jiang <719388+grammy-jiang@users.noreply.github.com> Date: Fri, 29 Nov 2019 19:13:57 +1100 Subject: [PATCH 3/6] Use pprint.pformat on overridden settings (#4199) Keeps consistency with scrapy.middleware --- scrapy/crawler.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scrapy/crawler.py b/scrapy/crawler.py index f8c80880a..19b61dc7e 100644 --- a/scrapy/crawler.py +++ b/scrapy/crawler.py @@ -1,3 +1,4 @@ +import pprint import six import signal import logging @@ -45,7 +46,8 @@ class Crawler(object): logging.root.addHandler(handler) d = dict(overridden_settings(self.settings)) - logger.info("Overridden settings: %(settings)r", {'settings': d}) + logger.info("Overridden settings:\n%(settings)s", + {'settings': pprint.pformat(d)}) if get_scrapy_root_handler() is not None: # scrapy root handler already installed: update it with new settings From 5d8d4bb7d7d998ae2324c4995bafaafdef752572 Mon Sep 17 00:00:00 2001 From: Grammy Jiang <719388+grammy-jiang@users.noreply.github.com> Date: Thu, 5 Dec 2019 00:22:10 +1100 Subject: [PATCH 4/6] Re-arrange the imports in the httpproxy module (#4210) This commit re-arranges the imports in the httpproxy module to follow pep8 --- scrapy/downloadermiddlewares/httpproxy.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scrapy/downloadermiddlewares/httpproxy.py b/scrapy/downloadermiddlewares/httpproxy.py index 2212d9688..5e4542b6c 100644 --- a/scrapy/downloadermiddlewares/httpproxy.py +++ b/scrapy/downloadermiddlewares/httpproxy.py @@ -1,7 +1,8 @@ import base64 +from urllib.request import _parse_proxy + from six.moves.urllib.parse import unquote, urlunparse from six.moves.urllib.request import getproxies, proxy_bypass -from urllib.request import _parse_proxy from scrapy.exceptions import NotConfigured from scrapy.utils.httpobj import urlparse_cached From 702333478d072c3c043c64ec7ad3997befb87943 Mon Sep 17 00:00:00 2001 From: Grammy Jiang <719388+grammy-jiang@users.noreply.github.com> Date: Thu, 5 Dec 2019 00:23:28 +1100 Subject: [PATCH 5/6] Re-arrange the imports in httpcache module (#4209) This commit re-arrange the imports in httpcache module to follow pep8 --- scrapy/downloadermiddlewares/httpcache.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/scrapy/downloadermiddlewares/httpcache.py b/scrapy/downloadermiddlewares/httpcache.py index 495b103d1..4e06f8236 100644 --- a/scrapy/downloadermiddlewares/httpcache.py +++ b/scrapy/downloadermiddlewares/httpcache.py @@ -1,11 +1,19 @@ from email.utils import formatdate + from twisted.internet import defer -from twisted.internet.error import TimeoutError, DNSLookupError, \ - ConnectionRefusedError, ConnectionDone, ConnectError, \ - ConnectionLost, TCPTimedOutError +from twisted.internet.error import ( + ConnectError, + ConnectionDone, + ConnectionLost, + ConnectionRefusedError, + DNSLookupError, + TCPTimedOutError, + TimeoutError, +) from twisted.web.client import ResponseFailed + from scrapy import signals -from scrapy.exceptions import NotConfigured, IgnoreRequest +from scrapy.exceptions import IgnoreRequest, NotConfigured from scrapy.utils.misc import load_object From 74627033c4a1701f3e197216f9f2801d497f5535 Mon Sep 17 00:00:00 2001 From: Grammy Jiang <719388+grammy-jiang@users.noreply.github.com> Date: Thu, 5 Dec 2019 00:24:14 +1100 Subject: [PATCH 6/6] Remove the used import and re-arrange the imports (#4208) This commit removes unused import and re-arrange the imports in cookies module --- scrapy/downloadermiddlewares/cookies.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapy/downloadermiddlewares/cookies.py b/scrapy/downloadermiddlewares/cookies.py index 0d2b9900c..aeb7578b8 100644 --- a/scrapy/downloadermiddlewares/cookies.py +++ b/scrapy/downloadermiddlewares/cookies.py @@ -1,8 +1,8 @@ -import os -import six import logging from collections import defaultdict +import six + from scrapy.exceptions import NotConfigured from scrapy.http import Response from scrapy.http.cookies import CookieJar