mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-06 10:24:24 +00:00
Flexible severity of logging level when items are dropped (#6608)
This commit is contained in:
parent
1c1e83895c
commit
ca345a3b73
@ -418,6 +418,38 @@ This setting also affects :setting:`DOWNLOAD_DELAY` and
|
||||
:ref:`topics-autothrottle`: if :setting:`CONCURRENT_REQUESTS_PER_IP`
|
||||
is non-zero, download delay is enforced per IP, not per domain.
|
||||
|
||||
.. setting:: DEFAULT_DROPITEM_LOG_LEVEL
|
||||
|
||||
DEFAULT_DROPITEM_LOG_LEVEL
|
||||
--------------------------
|
||||
|
||||
Default: ``"WARNING"``
|
||||
|
||||
Default :ref:`log level <levels>` of messages about dropped items.
|
||||
|
||||
When an item is dropped by raising :exc:`scrapy.exceptions.DropItem` from the
|
||||
:func:`process_item` method of an :ref:`item pipeline <topics-item-pipeline>`,
|
||||
a message is logged, and by default its log level is the one configured in this
|
||||
setting.
|
||||
|
||||
You may specify this log level as an integer (e.g. ``20``), as a log level
|
||||
constant (e.g. ``logging.INFO``) or as a string with the name of a log level
|
||||
constant (e.g. ``"INFO"``).
|
||||
|
||||
When writing an item pipeline, you can force a different log level by setting
|
||||
:attr:`scrapy.exceptions.DropItem.log_level` in your
|
||||
:exc:`scrapy.exceptions.DropItem` exception. For example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from scrapy.exceptions import DropItem
|
||||
|
||||
|
||||
class MyPipeline:
|
||||
def process_item(self, item, spider):
|
||||
if not item.get("price"):
|
||||
raise DropItem("Missing price data", log_level="INFO")
|
||||
return item
|
||||
|
||||
.. setting:: DEFAULT_ITEM_CLASS
|
||||
|
||||
|
@ -5,6 +5,8 @@ These exceptions are documented in docs/topics/exceptions.rst. Please don't add
|
||||
new exceptions here without documenting them there.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
# Internal
|
||||
@ -58,6 +60,10 @@ class StopDownload(Exception):
|
||||
class DropItem(Exception):
|
||||
"""Drop item from the item pipeline"""
|
||||
|
||||
def __init__(self, message: str, log_level: str | None = None):
|
||||
super().__init__(message)
|
||||
self.log_level = log_level
|
||||
|
||||
|
||||
class NotSupported(Exception):
|
||||
"""Indicates a feature or method is not supported"""
|
||||
|
@ -120,8 +120,12 @@ class LogFormatter:
|
||||
spider: Spider,
|
||||
) -> LogFormatterResult:
|
||||
"""Logs a message when an item is dropped while it is passing through the item pipeline."""
|
||||
if (level := getattr(exception, "log_level", None)) is None:
|
||||
level = spider.crawler.settings["DEFAULT_DROPITEM_LOG_LEVEL"]
|
||||
if isinstance(level, str):
|
||||
level = getattr(logging, level)
|
||||
return {
|
||||
"level": logging.WARNING,
|
||||
"level": level,
|
||||
"msg": DROPPEDMSG,
|
||||
"args": {
|
||||
"exception": exception,
|
||||
|
@ -49,6 +49,8 @@ CONCURRENT_REQUESTS_PER_IP = 0
|
||||
COOKIES_ENABLED = True
|
||||
COOKIES_DEBUG = False
|
||||
|
||||
DEFAULT_DROPITEM_LOG_LEVEL = "WARNING"
|
||||
|
||||
DEFAULT_ITEM_CLASS = "scrapy.item.Item"
|
||||
|
||||
DEFAULT_REQUEST_HEADERS = {
|
||||
|
@ -1,5 +1,7 @@
|
||||
import logging
|
||||
import unittest
|
||||
|
||||
import pytest
|
||||
from testfixtures import LogCapture
|
||||
from twisted.internet import defer
|
||||
from twisted.python.failure import Failure
|
||||
@ -26,6 +28,7 @@ class LogFormatterTestCase(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.formatter = LogFormatter()
|
||||
self.spider = Spider("default")
|
||||
self.spider.crawler = get_crawler()
|
||||
|
||||
def test_crawled_with_referer(self):
|
||||
req = Request("http://www.example.com")
|
||||
@ -68,6 +71,62 @@ class LogFormatterTestCase(unittest.TestCase):
|
||||
assert all(isinstance(x, str) for x in lines)
|
||||
self.assertEqual(lines, ["Dropped: \u2018", "{}"])
|
||||
|
||||
def test_dropitem_default_log_level(self):
|
||||
item = {}
|
||||
exception = DropItem("Test drop")
|
||||
response = Response("http://www.example.com")
|
||||
spider = Spider("foo")
|
||||
spider.crawler = get_crawler(Spider)
|
||||
|
||||
logkws = self.formatter.dropped(item, exception, response, spider)
|
||||
self.assertEqual(logkws["level"], logging.WARNING)
|
||||
|
||||
spider.crawler.settings.frozen = False
|
||||
spider.crawler.settings["DEFAULT_DROPITEM_LOG_LEVEL"] = logging.INFO
|
||||
spider.crawler.settings.frozen = True
|
||||
logkws = self.formatter.dropped(item, exception, response, spider)
|
||||
self.assertEqual(logkws["level"], logging.INFO)
|
||||
|
||||
spider.crawler.settings.frozen = False
|
||||
spider.crawler.settings["DEFAULT_DROPITEM_LOG_LEVEL"] = "INFO"
|
||||
spider.crawler.settings.frozen = True
|
||||
logkws = self.formatter.dropped(item, exception, response, spider)
|
||||
self.assertEqual(logkws["level"], logging.INFO)
|
||||
|
||||
spider.crawler.settings.frozen = False
|
||||
spider.crawler.settings["DEFAULT_DROPITEM_LOG_LEVEL"] = 10
|
||||
spider.crawler.settings.frozen = True
|
||||
logkws = self.formatter.dropped(item, exception, response, spider)
|
||||
self.assertEqual(logkws["level"], logging.DEBUG)
|
||||
|
||||
spider.crawler.settings.frozen = False
|
||||
spider.crawler.settings["DEFAULT_DROPITEM_LOG_LEVEL"] = 0
|
||||
spider.crawler.settings.frozen = True
|
||||
logkws = self.formatter.dropped(item, exception, response, spider)
|
||||
self.assertEqual(logkws["level"], logging.NOTSET)
|
||||
|
||||
unsupported_value = object()
|
||||
spider.crawler.settings.frozen = False
|
||||
spider.crawler.settings["DEFAULT_DROPITEM_LOG_LEVEL"] = unsupported_value
|
||||
spider.crawler.settings.frozen = True
|
||||
logkws = self.formatter.dropped(item, exception, response, spider)
|
||||
self.assertEqual(logkws["level"], unsupported_value)
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
logging.log(logkws["level"], "message")
|
||||
|
||||
def test_dropitem_custom_log_level(self):
|
||||
item = {}
|
||||
response = Response("http://www.example.com")
|
||||
|
||||
exception = DropItem("Test drop", log_level="INFO")
|
||||
logkws = self.formatter.dropped(item, exception, response, self.spider)
|
||||
self.assertEqual(logkws["level"], logging.INFO)
|
||||
|
||||
exception = DropItem("Test drop", log_level="ERROR")
|
||||
logkws = self.formatter.dropped(item, exception, response, self.spider)
|
||||
self.assertEqual(logkws["level"], logging.ERROR)
|
||||
|
||||
def test_item_error(self):
|
||||
# In practice, the complete traceback is shown by passing the
|
||||
# 'exc_info' argument to the logging function
|
||||
@ -145,6 +204,7 @@ class LogformatterSubclassTest(LogFormatterTestCase):
|
||||
def setUp(self):
|
||||
self.formatter = LogFormatterSubclass()
|
||||
self.spider = Spider("default")
|
||||
self.spider.crawler = get_crawler(Spider)
|
||||
|
||||
def test_crawled_with_referer(self):
|
||||
req = Request("http://www.example.com")
|
||||
|
Loading…
x
Reference in New Issue
Block a user