From ff3e299eb0c6fe486ecdaafaae340bb55a0c226e Mon Sep 17 00:00:00 2001 From: Omer Schleifer Date: Mon, 20 Feb 2017 16:42:29 +0200 Subject: [PATCH] [MRG+2] add flags to request (#2082) * add flags to request * fxi test - add flags to request * fix test(2) - add flags to request * fix test(2) - add flags to request * Updated test to reqser with flags field of request * Updated documntation with flags field of request * fix test identation * fix test failed * make the change backward comptaible * remove unrequired spaces, fix documentation request flags * remove unrequired space * fx assert equal * flags default is empty list * Add flags to request * add flags to request * fxi test - add flags to request * fix test(2) - add flags to request * fix test(2) - add flags to request * Updated test to reqser with flags field of request * Updated documntation with flags field of request * fix test identation * fix test failed * make the change backward comptaible * remove unrequired spaces, fix documentation request flags * remove unrequired space * fx assert equal * flags default is empty list * add flags to request squashed commits --- docs/topics/request-response.rst | 3 +++ scrapy/http/request/__init__.py | 3 ++- scrapy/logformatter.py | 8 +++++--- scrapy/utils/reqser.py | 4 +++- tests/test_logformatter.py | 8 ++++++++ tests/test_utils_reqser.py | 4 +++- 6 files changed, 24 insertions(+), 6 deletions(-) diff --git a/docs/topics/request-response.rst b/docs/topics/request-response.rst index 9e0dee000..9a6e0d1b6 100644 --- a/docs/topics/request-response.rst +++ b/docs/topics/request-response.rst @@ -121,6 +121,9 @@ Request objects see :ref:`topics-request-response-ref-errbacks` below. :type errback: callable + :param flags: Flags sent to the request, can be used for logging or similar purposes. + :type flags: list + .. attribute:: Request.url A string containing the URL of this request. Keep in mind that this diff --git a/scrapy/http/request/__init__.py b/scrapy/http/request/__init__.py index f48325a0f..1435d91de 100644 --- a/scrapy/http/request/__init__.py +++ b/scrapy/http/request/__init__.py @@ -18,7 +18,7 @@ class Request(object_ref): def __init__(self, url, callback=None, method='GET', headers=None, body=None, cookies=None, meta=None, encoding='utf-8', priority=0, - dont_filter=False, errback=None): + dont_filter=False, errback=None, flags=None): self._encoding = encoding # this one has to be set first self.method = str(method).upper() @@ -36,6 +36,7 @@ class Request(object_ref): self.dont_filter = dont_filter self._meta = dict(meta) if meta else None + self.flags = [] if flags is None else list(flags) @property def meta(self): diff --git a/scrapy/logformatter.py b/scrapy/logformatter.py index 2160d9ab0..e7bf7942e 100644 --- a/scrapy/logformatter.py +++ b/scrapy/logformatter.py @@ -7,7 +7,7 @@ from scrapy.utils.request import referer_str SCRAPEDMSG = u"Scraped from %(src)s" + os.linesep + "%(item)s" DROPPEDMSG = u"Dropped: %(exception)s" + os.linesep + "%(item)s" -CRAWLEDMSG = u"Crawled (%(status)s) %(request)s (referer: %(referer)s)%(flags)s" +CRAWLEDMSG = u"Crawled (%(status)s) %(request)s%(request_flags)s (referer: %(referer)s)%(response_flags)s" class LogFormatter(object): @@ -32,15 +32,17 @@ class LogFormatter(object): """ def crawled(self, request, response, spider): - flags = ' %s' % str(response.flags) if response.flags else '' + request_flags = ' %s' % str(request.flags) if request.flags else '' + response_flags = ' %s' % str(response.flags) if response.flags else '' return { 'level': logging.DEBUG, 'msg': CRAWLEDMSG, 'args': { 'status': response.status, 'request': request, + 'request_flags' : request_flags, 'referer': referer_str(request), - 'flags': flags, + 'response_flags': response_flags, } } diff --git a/scrapy/utils/reqser.py b/scrapy/utils/reqser.py index 2fceb0d94..959dddbd5 100644 --- a/scrapy/utils/reqser.py +++ b/scrapy/utils/reqser.py @@ -32,6 +32,7 @@ def request_to_dict(request, spider=None): '_encoding': request._encoding, 'priority': request.priority, 'dont_filter': request.dont_filter, + 'flags': request.flags } if type(request) is not Request: d['_class'] = request.__module__ + '.' + request.__class__.__name__ @@ -62,7 +63,8 @@ def request_from_dict(d, spider=None): meta=d['meta'], encoding=d['_encoding'], priority=d['priority'], - dont_filter=d['dont_filter']) + dont_filter=d['dont_filter'], + flags=d.get('flags')) def _find_method(obj, func): diff --git a/tests/test_logformatter.py b/tests/test_logformatter.py index 50e9662c6..11fe7b653 100644 --- a/tests/test_logformatter.py +++ b/tests/test_logformatter.py @@ -36,6 +36,14 @@ class LoggingContribTest(unittest.TestCase): self.assertEqual(logline, "Crawled (200) (referer: http://example.com) ['cached']") + def test_flags_in_request(self): + req = Request("http://www.example.com", flags=['test','flag']) + res = Response("http://www.example.com") + logkws = self.formatter.crawled(req, res, self.spider) + logline = logkws['msg'] % logkws['args'] + self.assertEqual(logline, + "Crawled (200) ['test', 'flag'] (referer: None)") + def test_dropped(self): item = {} exception = Exception(u"\u2018") diff --git a/tests/test_utils_reqser.py b/tests/test_utils_reqser.py index 5b889ab5d..073baadc2 100644 --- a/tests/test_utils_reqser.py +++ b/tests/test_utils_reqser.py @@ -25,7 +25,8 @@ class RequestSerializationTest(unittest.TestCase): cookies={'currency': u'руб'}, encoding='latin-1', priority=20, - meta={'a': 'b'}) + meta={'a': 'b'}, + flags=['testFlag']) self._assert_serializes_ok(r) def test_latin1_body(self): @@ -54,6 +55,7 @@ class RequestSerializationTest(unittest.TestCase): self.assertEqual(r1._encoding, r2._encoding) self.assertEqual(r1.priority, r2.priority) self.assertEqual(r1.dont_filter, r2.dont_filter) + self.assertEqual(r1.flags, r2.flags) def test_request_class(self): r = FormRequest("http://www.example.com")