mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-24 23:43:59 +00:00
fix Referer logging
This commit is contained in:
parent
7da769feb2
commit
642af00bb7
@ -16,6 +16,7 @@ from scrapy import signals
|
||||
from scrapy.http import Request, Response
|
||||
from scrapy.item import BaseItem
|
||||
from scrapy.core.spidermw import SpiderMiddlewareManager
|
||||
from scrapy.utils.request import referer_str
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -150,10 +151,9 @@ class Scraper(object):
|
||||
if isinstance(exc, CloseSpider):
|
||||
self.crawler.engine.close_spider(spider, exc.reason or 'cancelled')
|
||||
return
|
||||
referer = request.headers.get('Referer')
|
||||
logger.error(
|
||||
"Spider error processing %(request)s (referer: %(referer)s)",
|
||||
{'request': request, 'referer': referer},
|
||||
{'request': request, 'referer': referer_str(request)},
|
||||
exc_info=failure_to_exc_info(_failure),
|
||||
extra={'spider': spider}
|
||||
)
|
||||
|
@ -3,6 +3,7 @@ import logging
|
||||
|
||||
from twisted.python.failure import Failure
|
||||
|
||||
from scrapy.utils.request import referer_str
|
||||
|
||||
SCRAPEDMSG = u"Scraped from %(src)s" + os.linesep + "%(item)s"
|
||||
DROPPEDMSG = u"Dropped: %(exception)s" + os.linesep + "%(item)s"
|
||||
@ -38,13 +39,16 @@ class LogFormatter(object):
|
||||
'args': {
|
||||
'status': response.status,
|
||||
'request': request,
|
||||
'referer': request.headers.get('Referer'),
|
||||
'referer': referer_str(request),
|
||||
'flags': flags,
|
||||
}
|
||||
}
|
||||
|
||||
def scraped(self, item, response, spider):
|
||||
src = response.getErrorMessage() if isinstance(response, Failure) else response
|
||||
if isinstance(response, Failure):
|
||||
src = response.getErrorMessage()
|
||||
else:
|
||||
src = response
|
||||
return {
|
||||
'level': logging.DEBUG,
|
||||
'msg': SCRAPEDMSG,
|
||||
|
@ -26,7 +26,8 @@ from scrapy.exceptions import NotConfigured, IgnoreRequest
|
||||
from scrapy.http import Request
|
||||
from scrapy.utils.misc import md5sum
|
||||
from scrapy.utils.log import failure_to_exc_info
|
||||
from scrapy.utils.python import to_bytes, to_native_str
|
||||
from scrapy.utils.python import to_bytes
|
||||
from scrapy.utils.request import referer_str
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -199,7 +200,7 @@ class FilesPipeline(MediaPipeline):
|
||||
if age_days > self.EXPIRES:
|
||||
return # returning None force download
|
||||
|
||||
referer = _get_referer(request)
|
||||
referer = referer_str(request)
|
||||
logger.debug(
|
||||
'File (uptodate): Downloaded %(medianame)s from %(request)s '
|
||||
'referred in <%(referer)s>',
|
||||
@ -225,7 +226,7 @@ class FilesPipeline(MediaPipeline):
|
||||
|
||||
def media_failed(self, failure, request, info):
|
||||
if not isinstance(failure.value, IgnoreRequest):
|
||||
referer = _get_referer(request)
|
||||
referer = referer_str(request)
|
||||
logger.warning(
|
||||
'File (unknown-error): Error downloading %(medianame)s from '
|
||||
'%(request)s referred in <%(referer)s>: %(exception)s',
|
||||
@ -237,7 +238,7 @@ class FilesPipeline(MediaPipeline):
|
||||
raise FileException
|
||||
|
||||
def media_downloaded(self, response, request, info):
|
||||
referer = _get_referer(request)
|
||||
referer = referer_str(request)
|
||||
|
||||
if response.status != 200:
|
||||
logger.warning(
|
||||
@ -339,11 +340,3 @@ class FilesPipeline(MediaPipeline):
|
||||
def file_key(self, url):
|
||||
return self.file_path(url)
|
||||
file_key._base = True
|
||||
|
||||
|
||||
def _get_referer(request):
|
||||
""" Return Referer HTTP header suitable for logging """
|
||||
referrer = request.headers.get('Referer')
|
||||
if referrer is None:
|
||||
return referrer
|
||||
return to_native_str(referrer, errors='replace')
|
||||
|
@ -8,7 +8,6 @@ import hashlib
|
||||
import weakref
|
||||
from six.moves.urllib.parse import urlunparse
|
||||
|
||||
from twisted.internet.defer import Deferred
|
||||
from w3lib.http import basic_auth_header
|
||||
from scrapy.utils.python import to_bytes, to_native_str
|
||||
|
||||
@ -86,3 +85,10 @@ def request_httprepr(request):
|
||||
s += request.body
|
||||
return s
|
||||
|
||||
|
||||
def referer_str(request):
|
||||
""" Return Referer HTTP header suitable for logging. """
|
||||
referrer = request.headers.get('Referer')
|
||||
if referrer is None:
|
||||
return referrer
|
||||
return to_native_str(referrer, errors='replace')
|
||||
|
Loading…
x
Reference in New Issue
Block a user