1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-27 03:03:47 +00:00

Merge pull request #1197 from torymur/master

Test for robotstxt error
This commit is contained in:
Pablo Hoffman 2015-04-30 23:39:36 -03:00
commit 4d43b25abf

View File

@ -1,7 +1,8 @@
from __future__ import absolute_import
import re
from twisted.internet import reactor
from twisted.internet import reactor, error
from twisted.internet.defer import Deferred
from twisted.python import failure
from twisted.trial import unittest
from scrapy.downloadermiddlewares.robotstxt import RobotsTxtMiddleware
from scrapy.exceptions import IgnoreRequest, NotConfigured
@ -41,6 +42,26 @@ class RobotsTxtMiddlewareTest(unittest.TestCase):
reactor.callFromThread(deferred.callback, None)
return deferred
def test_robotstxt_error(self):
crawler = mock.MagicMock()
crawler.settings = Settings()
crawler.settings.set('ROBOTSTXT_OBEY', True)
crawler.engine.download = mock.MagicMock()
err = error.DNSLookupError('Robotstxt address not found')
def return_failure(request, spider):
deferred = Deferred()
reactor.callFromThread(deferred.errback, failure.Failure(err))
return deferred
crawler.engine.download.side_effect = return_failure
middleware = RobotsTxtMiddleware(crawler)
middleware._logerror = mock.MagicMock()
middleware.process_request(Request('http://site.local'), None)
deferred = Deferred()
deferred.addErrback(lambda _: self.assertIsNone(middleware._logerror.assert_any_call()))
reactor.callFromThread(deferred.callback, None)
return deferred
def assertNotIgnored(self, request, middleware):
spider = None # not actually used
self.assertIsNone(middleware.process_request(request, spider))