1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-23 15:23:40 +00:00

Preserve tracebacks better. Fixes GH-1760.

This commit is contained in:
Mikhail Korobov 2016-02-17 23:07:03 +05:00
parent 06da7af9e2
commit f766dd0ba8
2 changed files with 27 additions and 2 deletions

View File

@ -3,6 +3,7 @@ import signal
import logging
import warnings
import sys
from twisted.internet import reactor, defer
from zope.interface.verify import verifyClass, DoesNotImplement
@ -73,11 +74,21 @@ class Crawler(object):
yield self.engine.open_spider(self.spider, start_requests)
yield defer.maybeDeferred(self.engine.start)
except Exception:
exc = defer.fail()
# In Python 2 reraising an exception after yield discards
# the original traceback (see http://bugs.python.org/issue7563),
# so sys.exc_info() workaround is used.
# This workaround also works in Python 3, but it is not needed,
# and it is slower, so in Python 3 we use native `raise`.
if six.PY2:
exc_info = sys.exc_info()
self.crawling = False
if self.engine is not None:
yield self.engine.close()
yield exc
if six.PY2:
raise six.reraise(*exc_info)
raise
def _create_spider(self, *args, **kwargs):
return self.spidercls.from_crawler(self, *args, **kwargs)

View File

@ -204,6 +204,20 @@ class MySpider(scrapy.Spider):
log = to_native_str(p.stderr.read())
self.assertIn("Unable to load", log)
def test_start_requests_errors(self):
p = self.runspider("""
import scrapy
class BadSpider(scrapy.Spider):
name = "bad"
def start_requests(self):
raise Exception("oops!")
""", name="badspider.py")
log = to_native_str(p.stderr.read())
print(log)
self.assertIn("start_requests", log)
self.assertIn("badspider.py", log)
class ParseCommandTest(ProcessTest, SiteTest, CommandTest):
command = 'parse'