From 3a263280bad53a26490381f293a454be6c25ea30 Mon Sep 17 00:00:00 2001 From: "Kian-Meng, Ang" Date: Mon, 11 Oct 2021 22:32:42 +0800 Subject: [PATCH] Fix typos --- docs/news.rst | 10 +++++----- docs/topics/settings.rst | 4 ++-- docs/topics/shell.rst | 2 +- docs/topics/spiders.rst | 4 ++-- docs/versioning.rst | 2 +- extras/qpsclient.py | 2 +- scrapy/downloadermiddlewares/retry.py | 2 +- scrapy/exporters.py | 2 +- scrapy/linkextractors/lxmlhtml.py | 2 +- scrapy/pipelines/files.py | 8 ++++---- scrapy/spiders/feed.py | 4 ++-- scrapy/utils/console.py | 2 +- scrapy/utils/datatypes.py | 2 +- scrapy/utils/defer.py | 2 +- scrapy/utils/request.py | 4 ++-- sep/sep-001.rst | 2 +- sep/sep-005.rst | 2 +- sep/sep-014.rst | 2 +- sep/sep-021.rst | 2 +- tests/test_http_response.py | 4 ++-- tests/test_request_attribute_binding.py | 8 ++++---- tests/test_utils_defer.py | 4 ++-- tests/test_utils_template.py | 2 +- 23 files changed, 39 insertions(+), 39 deletions(-) diff --git a/docs/news.rst b/docs/news.rst index 5e590f027..509366c17 100644 --- a/docs/news.rst +++ b/docs/news.rst @@ -1830,7 +1830,7 @@ New features * A new scheduler priority queue, ``scrapy.pqueues.DownloaderAwarePriorityQueue``, may be :ref:`enabled ` for a significant - scheduling improvement on crawls targetting multiple web domains, at the + scheduling improvement on crawls targeting multiple web domains, at the cost of no :setting:`CONCURRENT_REQUESTS_PER_IP` support (:issue:`3520`) * A new :attr:`Request.cb_kwargs ` attribute @@ -2868,7 +2868,7 @@ Bug fixes - Fix for selected callbacks when using ``CrawlSpider`` with :command:`scrapy parse ` (:issue:`2225`). - Fix for invalid JSON and XML files when spider yields no items (:issue:`872`). -- Implement ``flush()`` fpr ``StreamLogger`` avoiding a warning in logs (:issue:`2125`). +- Implement ``flush()`` for ``StreamLogger`` avoiding a warning in logs (:issue:`2125`). Refactoring ~~~~~~~~~~~ @@ -3731,7 +3731,7 @@ Scrapy 0.24.3 (2014-08-09) - adding some xpath tips to selectors docs (:commit:`2d103e0`) - fix tests to account for https://github.com/scrapy/w3lib/pull/23 (:commit:`f8d366a`) - get_func_args maximum recursion fix #728 (:commit:`81344ea`) -- Updated input/ouput processor example according to #560. (:commit:`f7c4ea8`) +- Updated input/output processor example according to #560. (:commit:`f7c4ea8`) - Fixed Python syntax in tutorial. (:commit:`db59ed9`) - Add test case for tunneling proxy (:commit:`f090260`) - Bugfix for leaking Proxy-Authorization header to remote host when using tunneling (:commit:`d8793af`) @@ -4393,7 +4393,7 @@ Scrapyd changes ~~~~~~~~~~~~~~~ - Scrapyd now uses one process per spider -- It stores one log file per spider run, and rotate them keeping the lastest 5 logs per spider (by default) +- It stores one log file per spider run, and rotate them keeping the latest 5 logs per spider (by default) - A minimal web ui was added, available at http://localhost:6800 by default - There is now a ``scrapy server`` command to start a Scrapyd server of the current project @@ -4429,7 +4429,7 @@ New features and improvements - Added two new methods to item pipeline open_spider(), close_spider() with deferred support (#195) - Support for overriding default request headers per spider (#181) - Replaced default Spider Manager with one with similar functionality but not depending on Twisted Plugins (#186) -- Splitted Debian package into two packages - the library and the service (#187) +- Split Debian package into two packages - the library and the service (#187) - Scrapy log refactoring (#188) - New extension for keeping persistent spider contexts among different runs (#203) - Added ``dont_redirect`` request.meta key for avoiding redirects (#233) diff --git a/docs/topics/settings.rst b/docs/topics/settings.rst index 2ab2020fa..19a549a02 100644 --- a/docs/topics/settings.rst +++ b/docs/topics/settings.rst @@ -1566,7 +1566,7 @@ If a reactor is already installed, :meth:`CrawlerRunner.__init__ ` raises :exc:`Exception` if the installed reactor does not match the -:setting:`TWISTED_REACTOR` setting; therfore, having top-level +:setting:`TWISTED_REACTOR` setting; therefore, having top-level :mod:`~twisted.internet.reactor` imports in project files and imported third-party libraries will make Scrapy raise :exc:`Exception` when it checks which reactor is installed. @@ -1658,7 +1658,7 @@ Default: ``"Scrapy/VERSION (+https://scrapy.org)"`` The default User-Agent to use when crawling, unless overridden. This user agent is also used by :class:`~scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware` if :setting:`ROBOTSTXT_USER_AGENT` setting is ``None`` and -there is no overridding User-Agent header specified for the request. +there is no overriding User-Agent header specified for the request. Settings documented elsewhere: diff --git a/docs/topics/shell.rst b/docs/topics/shell.rst index 8c90a506c..007e9fc2f 100644 --- a/docs/topics/shell.rst +++ b/docs/topics/shell.rst @@ -99,7 +99,7 @@ Available Shortcuts shortcuts - ``fetch(url[, redirect=True])`` - fetch a new response from the given URL - and update all related objects accordingly. You can optionaly ask for HTTP + and update all related objects accordingly. You can optionally ask for HTTP 3xx redirections to not be followed by passing ``redirect=False`` - ``fetch(request)`` - fetch a new response from the given request and update diff --git a/docs/topics/spiders.rst b/docs/topics/spiders.rst index 4d3d32941..99e74233a 100644 --- a/docs/topics/spiders.rst +++ b/docs/topics/spiders.rst @@ -372,7 +372,7 @@ CrawlSpider described below. If multiple rules match the same link, the first one will be used, according to the order they're defined in this attribute. - This spider also exposes an overrideable method: + This spider also exposes an overridable method: .. method:: parse_start_url(response, **kwargs) @@ -534,7 +534,7 @@ XMLFeedSpider itertag = 'n:url' # ... - Apart from these new attributes, this spider has the following overrideable + Apart from these new attributes, this spider has the following overridable methods too: .. method:: adapt_response(response) diff --git a/docs/versioning.rst b/docs/versioning.rst index 57643ea9a..9d02757b0 100644 --- a/docs/versioning.rst +++ b/docs/versioning.rst @@ -13,7 +13,7 @@ There are 3 numbers in a Scrapy version: *A.B.C* large changes. * *B* is the release number. This will include many changes including features and things that possibly break backward compatibility, although we strive to - keep theses cases at a minimum. + keep these cases at a minimum. * *C* is the bugfix release number. Backward-incompatibilities are explicitly mentioned in the :ref:`release notes `, diff --git a/extras/qpsclient.py b/extras/qpsclient.py index f9fb70342..28703650d 100644 --- a/extras/qpsclient.py +++ b/extras/qpsclient.py @@ -1,5 +1,5 @@ """ -A spider that generate light requests to meassure QPS troughput +A spider that generate light requests to meassure QPS throughput usage: diff --git a/scrapy/downloadermiddlewares/retry.py b/scrapy/downloadermiddlewares/retry.py index f1fdc3858..c6cc7c56d 100644 --- a/scrapy/downloadermiddlewares/retry.py +++ b/scrapy/downloadermiddlewares/retry.py @@ -2,7 +2,7 @@ An extension to retry failed requests that are potentially caused by temporary problems such as a connection timeout or HTTP 500 error. -You can change the behaviour of this middleware by modifing the scraping settings: +You can change the behaviour of this middleware by modifying the scraping settings: RETRY_TIMES - how many times to retry a failed page RETRY_HTTP_CODES - which HTTP response codes to retry diff --git a/scrapy/exporters.py b/scrapy/exporters.py index fb4b565cf..36cca2d05 100644 --- a/scrapy/exporters.py +++ b/scrapy/exporters.py @@ -30,7 +30,7 @@ class BaseItemExporter: self._configure(kwargs, dont_fail=dont_fail) def _configure(self, options, dont_fail=False): - """Configure the exporter by poping options from the ``options`` dict. + """Configure the exporter by popping options from the ``options`` dict. If dont_fail is set, it won't raise an exception on unexpected options (useful for using with keyword arguments in subclasses ``__init__`` methods) """ diff --git a/scrapy/linkextractors/lxmlhtml.py b/scrapy/linkextractors/lxmlhtml.py index e941c4321..b5d2585a8 100644 --- a/scrapy/linkextractors/lxmlhtml.py +++ b/scrapy/linkextractors/lxmlhtml.py @@ -88,7 +88,7 @@ class LxmlParserLinkExtractor: def _process_links(self, links): """ Normalize and filter extracted links - The subclass should override it if neccessary + The subclass should override it if necessary """ return self._deduplicate_if_needed(links) diff --git a/scrapy/pipelines/files.py b/scrapy/pipelines/files.py index 8766ef66f..5c52c6c28 100644 --- a/scrapy/pipelines/files.py +++ b/scrapy/pipelines/files.py @@ -85,7 +85,7 @@ class S3FilesStore: AWS_USE_SSL = None AWS_VERIFY = None - POLICY = 'private' # Overriden from settings.FILES_STORE_S3_ACL in FilesPipeline.from_settings + POLICY = 'private' # Overridden from settings.FILES_STORE_S3_ACL in FilesPipeline.from_settings HEADERS = { 'Cache-Control': 'max-age=172800', } @@ -142,7 +142,7 @@ class S3FilesStore: **extra) def _headers_to_botocore_kwargs(self, headers): - """ Convert headers to botocore keyword agruments. + """ Convert headers to botocore keyword arguments. """ # This is required while we need to support both boto and botocore. mapping = CaselessDict({ @@ -190,7 +190,7 @@ class GCSFilesStore: CACHE_CONTROL = 'max-age=172800' # The bucket's default object ACL will be applied to the object. - # Overriden from settings.FILES_STORE_GCS_ACL in FilesPipeline.from_settings. + # Overridden from settings.FILES_STORE_GCS_ACL in FilesPipeline.from_settings. POLICY = None def __init__(self, uri): @@ -291,7 +291,7 @@ class FilesPipeline(MediaPipeline): """Abstract pipeline that implement the file downloading This pipeline tries to minimize network transfers and file processing, - doing stat of the files and determining if file is new, uptodate or + doing stat of the files and determining if file is new, up-to-date or expired. ``new`` files are those that pipeline never processed and needs to be diff --git a/scrapy/spiders/feed.py b/scrapy/spiders/feed.py index 6ed17e4dd..bef2d6b24 100644 --- a/scrapy/spiders/feed.py +++ b/scrapy/spiders/feed.py @@ -43,7 +43,7 @@ class XMLFeedSpider(Spider): return response def parse_node(self, response, selector): - """This method must be overriden with your custom spider functionality""" + """This method must be overridden with your custom spider functionality""" if hasattr(self, 'parse_item'): # backward compatibility return self.parse_item(response, selector) raise NotImplementedError @@ -113,7 +113,7 @@ class CSVFeedSpider(Spider): return response def parse_row(self, response, row): - """This method must be overriden with your custom spider functionality""" + """This method must be overridden with your custom spider functionality""" raise NotImplementedError def parse_rows(self, response): diff --git a/scrapy/utils/console.py b/scrapy/utils/console.py index 133261fd7..1bc0bd45f 100644 --- a/scrapy/utils/console.py +++ b/scrapy/utils/console.py @@ -14,7 +14,7 @@ def _embed_ipython_shell(namespace={}, banner=''): @wraps(_embed_ipython_shell) def wrapper(namespace=namespace, banner=''): config = load_default_config() - # Always use .instace() to ensure _instance propagation to all parents + # Always use .instance() to ensure _instance propagation to all parents # this is needed for completion works well for new imports # and clear the instance to always have the fresh env # on repeated breaks like with inspect_response() diff --git a/scrapy/utils/datatypes.py b/scrapy/utils/datatypes.py index e31284a7f..47df8a717 100644 --- a/scrapy/utils/datatypes.py +++ b/scrapy/utils/datatypes.py @@ -41,7 +41,7 @@ class CaselessDict(dict): return key.lower() def normvalue(self, value): - """Method to normalize values prior to be setted""" + """Method to normalize values prior to be set""" return value def get(self, key, def_val=None): diff --git a/scrapy/utils/defer.py b/scrapy/utils/defer.py index b317c12a3..b02bfdccb 100644 --- a/scrapy/utils/defer.py +++ b/scrapy/utils/defer.py @@ -34,7 +34,7 @@ def defer_succeed(result) -> Deferred: """Same as twisted.internet.defer.succeed but delay calling callback until next reactor loop - It delays by 100ms so reactor has a chance to go trough readers and writers + It delays by 100ms so reactor has a chance to go through readers and writers before attending pending delayed calls, so do not set delay to zero. """ from twisted.internet import reactor diff --git a/scrapy/utils/request.py b/scrapy/utils/request.py index 57dcc5f2c..70ef3ba2b 100644 --- a/scrapy/utils/request.py +++ b/scrapy/utils/request.py @@ -48,7 +48,7 @@ def request_fingerprint( the fingerprint. For this reason, request headers are ignored by default when calculating - the fingeprint. If you want to include specific headers use the + the fingerprint. If you want to include specific headers use the include_headers argument, which is a list of Request headers to include. Also, servers usually ignore fragments in urls when handling requests, @@ -78,7 +78,7 @@ def request_fingerprint( def request_authenticate(request: Request, username: str, password: str) -> None: - """Autenticate the given request (in place) using the HTTP basic access + """Authenticate the given request (in place) using the HTTP basic access authentication mechanism (RFC 2617) and the given username and password """ request.headers['Authorization'] = basic_auth_header(username, password) diff --git a/sep/sep-001.rst b/sep/sep-001.rst index 00226283f..f704e113f 100644 --- a/sep/sep-001.rst +++ b/sep/sep-001.rst @@ -260,7 +260,7 @@ ItemForm ia['width'] = x.x('//p[@class="width"]') ia['volume'] = x.x('//p[@class="volume"]') - # another example passing parametes on instance + # another example passing parameters on instance ia = NewsForm(response, encoding='utf-8') ia['name'] = x.x('//p[@class="name"]') diff --git a/sep/sep-005.rst b/sep/sep-005.rst index e795838e4..08ed367b3 100644 --- a/sep/sep-005.rst +++ b/sep/sep-005.rst @@ -107,7 +107,7 @@ gUsing default_builder This will use default_builder as the builder for every field in the item class. -As a reducer is not set reducers will be set based on Item Field classess. +As a reducer is not set reducers will be set based on Item Field classes. gReset default_builder for a field ================================== diff --git a/sep/sep-014.rst b/sep/sep-014.rst index 8ca81824d..0859e3f7c 100644 --- a/sep/sep-014.rst +++ b/sep/sep-014.rst @@ -64,7 +64,7 @@ Request Processors takes requests objects and can perform any action to them, like filtering or modifying on the fly. The current ``LinkExtractor`` had integrated link processing, like -canonicalize. Request Processors can be reutilized and applied in serie. +canonicalize. Request Processors can be reutilized and applied in series. Request Generator ----------------- diff --git a/sep/sep-021.rst b/sep/sep-021.rst index 372429791..c1ec16f7f 100644 --- a/sep/sep-021.rst +++ b/sep/sep-021.rst @@ -22,7 +22,7 @@ Instead, the hooks are spread over: * Downloader handlers (DOWNLOADER_HANDLERS) * Item pipelines (ITEM_PIPELINES) * Feed exporters and storages (FEED_EXPORTERS, FEED_STORAGES) -* Overrideable components (DUPEFILTER_CLASS, STATS_CLASS, SCHEDULER, SPIDER_MANAGER_CLASS, ITEM_PROCESSOR, etc) +* Overridable components (DUPEFILTER_CLASS, STATS_CLASS, SCHEDULER, SPIDER_MANAGER_CLASS, ITEM_PROCESSOR, etc) * Generic extensions (EXTENSIONS) * CLI commands (COMMANDS_MODULE) diff --git a/tests/test_http_response.py b/tests/test_http_response.py index c376a46cd..0ec5257e1 100644 --- a/tests/test_http_response.py +++ b/tests/test_http_response.py @@ -19,7 +19,7 @@ class BaseResponseTest(unittest.TestCase): response_class = Response def test_init(self): - # Response requires url in the consturctor + # Response requires url in the constructor self.assertRaises(Exception, self.response_class) self.assertTrue(isinstance(self.response_class('http://example.com/'), self.response_class)) self.assertRaises(TypeError, self.response_class, b"http://example.com") @@ -392,7 +392,7 @@ class TextResponseTest(BaseResponseTest): def test_declared_encoding_invalid(self): """Check that unknown declared encodings are ignored""" r = self.response_class("http://www.example.com", - headers={"Content-type": ["text/html; charset=UKNOWN"]}, + headers={"Content-type": ["text/html; charset=UNKNOWN"]}, body=b"\xc2\xa3") self.assertEqual(r._declared_encoding(), None) self._assert_response_values(r, 'utf-8', "\xa3") diff --git a/tests/test_request_attribute_binding.py b/tests/test_request_attribute_binding.py index 00c532c41..25d9657d5 100644 --- a/tests/test_request_attribute_binding.py +++ b/tests/test_request_attribute_binding.py @@ -106,9 +106,9 @@ class CrawlTestCase(TestCase): """ Downloader middleware which returns a response with an specific 'request' attribute. - * The spider callback should receive the overriden response.request - * Handlers listening to the response_received signal should receive the overriden response.request - * The "crawled" log message should show the overriden response.request + * The spider callback should receive the overridden response.request + * Handlers listening to the response_received signal should receive the overridden response.request + * The "crawled" log message should show the overridden response.request """ signal_params = {} @@ -144,7 +144,7 @@ class CrawlTestCase(TestCase): An exception is raised but caught by the next middleware, which returns a Response with a specific 'request' attribute. - The spider callback should receive the overriden response.request + The spider callback should receive the overridden response.request """ url = self.mockserver.url("/status?n=200") runner = CrawlerRunner(settings={ diff --git a/tests/test_utils_defer.py b/tests/test_utils_defer.py index 7a5f458c7..032dbc8c5 100644 --- a/tests/test_utils_defer.py +++ b/tests/test_utils_defer.py @@ -23,7 +23,7 @@ class MustbeDeferredTest(unittest.TestCase): dfd = mustbe_deferred(_append, 1) dfd.addCallback(self.assertEqual, [1, 2]) # it is [1] with maybeDeferred - steps.append(2) # add another value, that should be catched by assertEqual + steps.append(2) # add another value, that should be caught by assertEqual return dfd def test_unfired_deferred(self): @@ -37,7 +37,7 @@ class MustbeDeferredTest(unittest.TestCase): dfd = mustbe_deferred(_append, 1) dfd.addCallback(self.assertEqual, [1, 2]) # it is [1] with maybeDeferred - steps.append(2) # add another value, that should be catched by assertEqual + steps.append(2) # add another value, that should be caught by assertEqual return dfd diff --git a/tests/test_utils_template.py b/tests/test_utils_template.py index 5ff2e41ef..1d5e63363 100644 --- a/tests/test_utils_template.py +++ b/tests/test_utils_template.py @@ -36,7 +36,7 @@ class UtilsRenderTemplateFileTestCase(unittest.TestCase): self.assertEqual(result.read().decode('utf8'), rendered) os.remove(render_path) - assert not os.path.exists(render_path) # Failure of test iself + assert not os.path.exists(render_path) # Failure of test itself if '__main__' == __name__: