From 42f7674619a779b029a3a4a9db8242d90f140453 Mon Sep 17 00:00:00 2001 From: "Victoria Terenina (torymur)" Date: Thu, 2 Apr 2015 17:48:38 +0300 Subject: [PATCH] fixed unhandled error in deferred (RobotsTxtMiddleware) --- scrapy/contrib/downloadermiddleware/robotstxt.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scrapy/contrib/downloadermiddleware/robotstxt.py b/scrapy/contrib/downloadermiddleware/robotstxt.py index 0ea402719..a58ecca8e 100644 --- a/scrapy/contrib/downloadermiddleware/robotstxt.py +++ b/scrapy/contrib/downloadermiddleware/robotstxt.py @@ -49,8 +49,14 @@ class RobotsTxtMiddleware(object): ) dfd = self.crawler.engine.download(robotsreq, spider) dfd.addCallback(self._parse_robots) + dfd.addErrback(self._logerror, robotsreq, spider) return self._parsers[netloc] + def _logerror(self, failure, request, spider): + if failure.type is not IgnoreRequest: + log.msg(format="Error downloading %%(request)s: %s" % failure.value, + level=log.ERROR, request=request, spider=spider) + def _parse_robots(self, response): rp = robotparser.RobotFileParser(response.url) rp.parse(response.body.splitlines())