2009-10-06 22:47:17 -02:00
|
|
|
import unittest
|
2015-08-26 02:34:21 +05:00
|
|
|
import six
|
2009-10-06 22:47:17 -02:00
|
|
|
|
2015-05-09 04:20:09 -03:00
|
|
|
from scrapy.spiders import Spider
|
2009-10-06 22:47:17 -02:00
|
|
|
from scrapy.http import Request, Response
|
2011-07-12 01:16:06 -03:00
|
|
|
from scrapy.item import Item, Field
|
2010-08-26 23:23:58 -03:00
|
|
|
from scrapy.logformatter import LogFormatter
|
2009-10-06 22:47:17 -02:00
|
|
|
|
|
|
|
|
2011-07-12 01:16:06 -03:00
|
|
|
class CustomItem(Item):
|
|
|
|
|
|
|
|
name = Field()
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
return "name: %s" % self['name']
|
|
|
|
|
|
|
|
|
2009-10-06 22:47:17 -02:00
|
|
|
class LoggingContribTest(unittest.TestCase):
|
|
|
|
|
2010-08-26 23:23:58 -03:00
|
|
|
def setUp(self):
|
|
|
|
self.formatter = LogFormatter()
|
2013-12-28 00:47:32 +06:00
|
|
|
self.spider = Spider('default')
|
2010-08-26 23:23:58 -03:00
|
|
|
|
|
|
|
def test_crawled(self):
|
2009-10-06 22:47:17 -02:00
|
|
|
req = Request("http://www.example.com")
|
|
|
|
res = Response("http://www.example.com")
|
2012-08-09 16:55:05 -03:00
|
|
|
logkws = self.formatter.crawled(req, res, self.spider)
|
2015-02-28 03:22:06 -03:00
|
|
|
logline = logkws['msg'] % logkws['args']
|
2012-08-09 16:55:05 -03:00
|
|
|
self.assertEqual(logline,
|
2009-10-06 22:47:17 -02:00
|
|
|
"Crawled (200) <GET http://www.example.com> (referer: None)")
|
|
|
|
|
|
|
|
req = Request("http://www.example.com", headers={'referer': 'http://example.com'})
|
|
|
|
res = Response("http://www.example.com", flags=['cached'])
|
2012-08-09 16:55:05 -03:00
|
|
|
logkws = self.formatter.crawled(req, res, self.spider)
|
2015-02-28 03:22:06 -03:00
|
|
|
logline = logkws['msg'] % logkws['args']
|
2012-08-09 16:55:05 -03:00
|
|
|
self.assertEqual(logline,
|
2009-10-06 22:47:17 -02:00
|
|
|
"Crawled (200) <GET http://www.example.com> (referer: http://example.com) ['cached']")
|
|
|
|
|
2010-09-08 15:34:13 -03:00
|
|
|
def test_dropped(self):
|
|
|
|
item = {}
|
|
|
|
exception = Exception(u"\u2018")
|
2011-06-03 01:13:00 -03:00
|
|
|
response = Response("http://www.example.com")
|
2012-08-09 16:55:05 -03:00
|
|
|
logkws = self.formatter.dropped(item, exception, response, self.spider)
|
2015-02-28 03:22:06 -03:00
|
|
|
logline = logkws['msg'] % logkws['args']
|
2012-08-09 16:55:05 -03:00
|
|
|
lines = logline.splitlines()
|
2015-08-26 02:34:21 +05:00
|
|
|
assert all(isinstance(x, six.text_type) for x in lines)
|
2011-06-03 01:13:01 -03:00
|
|
|
self.assertEqual(lines, [u"Dropped: \u2018", '{}'])
|
2009-10-06 22:47:17 -02:00
|
|
|
|
2011-07-12 01:16:06 -03:00
|
|
|
def test_scraped(self):
|
|
|
|
item = CustomItem()
|
|
|
|
item['name'] = u'\xa3'
|
|
|
|
response = Response("http://www.example.com")
|
2012-08-09 16:55:05 -03:00
|
|
|
logkws = self.formatter.scraped(item, response, self.spider)
|
2015-02-28 03:22:06 -03:00
|
|
|
logline = logkws['msg'] % logkws['args']
|
2012-08-09 16:55:05 -03:00
|
|
|
lines = logline.splitlines()
|
2015-08-26 02:34:21 +05:00
|
|
|
assert all(isinstance(x, six.text_type) for x in lines)
|
2011-07-12 01:16:06 -03:00
|
|
|
self.assertEqual(lines, [u"Scraped from <200 http://www.example.com>", u'name: \xa3'])
|
|
|
|
|
2009-10-06 22:47:17 -02:00
|
|
|
if __name__ == "__main__":
|
|
|
|
unittest.main()
|