1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-03-14 13:08:16 +00:00

Remove assertions from production code (#4440)

This commit is contained in:
Eugenio Lacuesta 2020-04-23 07:40:10 -03:00 committed by GitHub
parent e4750f2fbd
commit efb6f13deb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 50 additions and 23 deletions

View File

@ -23,7 +23,8 @@ class ScrapyCommand:
self.settings = None # set in scrapy.cmdline
def set_crawler(self, crawler):
assert not hasattr(self, '_crawler'), "crawler already set"
if hasattr(self, '_crawler'):
raise RuntimeError("crawler already set")
self._crawler = crawler
def syntax(self):

View File

@ -58,7 +58,11 @@ class ReturnsContract(Contract):
def __init__(self, *args, **kwargs):
super(ReturnsContract, self).__init__(*args, **kwargs)
assert len(self.args) in [1, 2, 3]
if len(self.args) not in [1, 2, 3]:
raise ValueError(
"Incorrect argument quantity: expected 1, 2 or 3, got %i"
% len(self.args)
)
self.obj_name = self.args[0] or None
self.obj_type = self.objects[self.obj_name]

View File

@ -45,8 +45,9 @@ class DownloaderMiddlewareManager(MiddlewareManager):
@defer.inlineCallbacks
def process_response(response):
assert response is not None, 'Received None in process_response'
if isinstance(response, Request):
if response is None:
raise TypeError("Received None in process_response")
elif isinstance(response, Request):
return response
for method in self.methods['process_response']:

View File

@ -73,7 +73,8 @@ class ExecutionEngine:
@defer.inlineCallbacks
def start(self):
"""Start the execution engine"""
assert not self.running, "Engine already running"
if self.running:
raise RuntimeError("Engine already running")
self.start_time = time()
yield self.signals.send_catch_log_deferred(signal=signals.engine_started)
self.running = True
@ -82,7 +83,8 @@ class ExecutionEngine:
def stop(self):
"""Stop the execution engine gracefully"""
assert self.running, "Engine not running"
if not self.running:
raise RuntimeError("Engine not running")
self.running = False
dfd = self._close_all_spiders()
return dfd.addBoth(lambda _: self._finish_stopping_engine())
@ -165,7 +167,11 @@ class ExecutionEngine:
return d
def _handle_downloader_output(self, response, request, spider):
assert isinstance(response, (Request, Response, Failure)), response
if not isinstance(response, (Request, Response, Failure)):
raise TypeError(
"Incorrect type: expected Request, Response or Failure, got %s: %r"
% (type(response), response)
)
# downloader middleware can return requests (for example, redirects)
if isinstance(response, Request):
self.crawl(response, spider)
@ -205,8 +211,8 @@ class ExecutionEngine:
return not bool(self.slot)
def crawl(self, request, spider):
assert spider in self.open_spiders, \
"Spider %r not opened when crawling: %s" % (spider.name, request)
if spider not in self.open_spiders:
raise RuntimeError("Spider %r not opened when crawling: %s" % (spider.name, request))
self.schedule(request, spider)
self.slot.nextcall.schedule()
@ -232,7 +238,11 @@ class ExecutionEngine:
slot.add_request(request)
def _on_success(response):
assert isinstance(response, (Response, Request))
if not isinstance(response, (Response, Request)):
raise TypeError(
"Incorrect type: expected Response or Request, got %s: %r"
% (type(response), response)
)
if isinstance(response, Response):
response.request = request # tie request to response received
logkws = self.logformatter.crawled(request, response, spider)
@ -253,8 +263,8 @@ class ExecutionEngine:
@defer.inlineCallbacks
def open_spider(self, spider, start_requests=(), close_if_idle=True):
assert self.has_capacity(), "No free spider slot when opening %r" % \
spider.name
if not self.has_capacity():
raise RuntimeError("No free spider slot when opening %r" % spider.name)
logger.info("Spider opened", extra={'spider': spider})
nextcall = CallLaterOnce(self._next_request, spider)
scheduler = self.scheduler_cls.from_crawler(self.crawler)

View File

@ -123,7 +123,11 @@ class Scraper:
def _scrape(self, response, request, spider):
"""Handle the downloaded response or failure through the spider
callback/errback"""
assert isinstance(response, (Response, Failure))
if not isinstance(response, (Response, Failure)):
raise TypeError(
"Incorrect type: expected Response or Failure, got %s: %r"
% (type(response), response)
)
dfd = self._scrape2(response, request, spider) # returns spider's processed output
dfd.addErrback(self.handle_spider_error, request, response, spider)

View File

@ -78,7 +78,8 @@ class Crawler:
@defer.inlineCallbacks
def crawl(self, *args, **kwargs):
assert not self.crawling, "Crawling already taking place"
if self.crawling:
raise RuntimeError("Crawling already taking place")
self.crawling = True
try:

View File

@ -24,7 +24,8 @@ class Request(object_ref):
self.method = str(method).upper()
self._set_url(url)
self._set_body(body)
assert isinstance(priority, int), "Request priority not an integer: %r" % priority
if not isinstance(priority, int):
raise TypeError("Request priority not an integer: %r" % priority)
self.priority = priority
if callback is not None and not callable(callback):

View File

@ -106,7 +106,8 @@ class S3FilesStore:
else:
from boto.s3.connection import S3Connection
self.S3Connection = S3Connection
assert uri.startswith('s3://')
if not uri.startswith("s3://"):
raise ValueError("Incorrect URI scheme in %s, expected 's3'" % uri)
self.bucket, self.prefix = uri[5:].split('/', 1)
def stat_file(self, path, info):
@ -266,7 +267,8 @@ class FTPFilesStore:
USE_ACTIVE_MODE = None
def __init__(self, uri):
assert uri.startswith('ftp://')
if not uri.startswith("ftp://"):
raise ValueError("Incorrect URI scheme in %s, expected 'ftp'" % uri)
u = urlparse(uri)
self.port = u.port
self.host = u.hostname

View File

@ -128,10 +128,12 @@ def csviter(obj, delimiter=None, headers=None, encoding=None, quotechar=None):
def _body_or_str(obj, unicode=True):
expected_types = (Response, str, bytes)
assert isinstance(obj, expected_types), \
"obj must be %s, not %s" % (
" or ".join(t.__name__ for t in expected_types),
type(obj).__name__)
if not isinstance(obj, expected_types):
expected_types_str = " or ".join(t.__name__ for t in expected_types)
raise TypeError(
"Object %r must be %s, not %s"
% (obj, expected_types_str, type(obj).__name__)
)
if isinstance(obj, Response):
if not unicode:
return obj.body

View File

@ -9,7 +9,8 @@ from scrapy.utils.misc import load_object
def listen_tcp(portrange, host, factory):
"""Like reactor.listenTCP but tries different ports in a range."""
from twisted.internet import reactor
assert len(portrange) <= 2, "invalid portrange: %s" % portrange
if len(portrange) > 2:
raise ValueError("invalid portrange: %s" % portrange)
if not portrange:
return reactor.listenTCP(0, factory, interface=host)
if not hasattr(portrange, '__iter__'):

View File

@ -157,7 +157,7 @@ class XmliterTestCase(unittest.TestCase):
def test_xmliter_objtype_exception(self):
i = self.xmliter(42, 'product')
self.assertRaises(AssertionError, next, i)
self.assertRaises(TypeError, next, i)
def test_xmliter_encoding(self):
body = b'<?xml version="1.0" encoding="ISO-8859-9"?>\n<xml>\n <item>Some Turkish Characters \xd6\xc7\xde\xdd\xd0\xdc \xfc\xf0\xfd\xfe\xe7\xf6</item>\n</xml>\n\n'