mirror of
https://github.com/scrapy/scrapy.git
synced 2025-03-14 13:08:16 +00:00
Remove assertions from production code (#4440)
This commit is contained in:
parent
e4750f2fbd
commit
efb6f13deb
@ -23,7 +23,8 @@ class ScrapyCommand:
|
||||
self.settings = None # set in scrapy.cmdline
|
||||
|
||||
def set_crawler(self, crawler):
|
||||
assert not hasattr(self, '_crawler'), "crawler already set"
|
||||
if hasattr(self, '_crawler'):
|
||||
raise RuntimeError("crawler already set")
|
||||
self._crawler = crawler
|
||||
|
||||
def syntax(self):
|
||||
|
@ -58,7 +58,11 @@ class ReturnsContract(Contract):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(ReturnsContract, self).__init__(*args, **kwargs)
|
||||
|
||||
assert len(self.args) in [1, 2, 3]
|
||||
if len(self.args) not in [1, 2, 3]:
|
||||
raise ValueError(
|
||||
"Incorrect argument quantity: expected 1, 2 or 3, got %i"
|
||||
% len(self.args)
|
||||
)
|
||||
self.obj_name = self.args[0] or None
|
||||
self.obj_type = self.objects[self.obj_name]
|
||||
|
||||
|
@ -45,8 +45,9 @@ class DownloaderMiddlewareManager(MiddlewareManager):
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def process_response(response):
|
||||
assert response is not None, 'Received None in process_response'
|
||||
if isinstance(response, Request):
|
||||
if response is None:
|
||||
raise TypeError("Received None in process_response")
|
||||
elif isinstance(response, Request):
|
||||
return response
|
||||
|
||||
for method in self.methods['process_response']:
|
||||
|
@ -73,7 +73,8 @@ class ExecutionEngine:
|
||||
@defer.inlineCallbacks
|
||||
def start(self):
|
||||
"""Start the execution engine"""
|
||||
assert not self.running, "Engine already running"
|
||||
if self.running:
|
||||
raise RuntimeError("Engine already running")
|
||||
self.start_time = time()
|
||||
yield self.signals.send_catch_log_deferred(signal=signals.engine_started)
|
||||
self.running = True
|
||||
@ -82,7 +83,8 @@ class ExecutionEngine:
|
||||
|
||||
def stop(self):
|
||||
"""Stop the execution engine gracefully"""
|
||||
assert self.running, "Engine not running"
|
||||
if not self.running:
|
||||
raise RuntimeError("Engine not running")
|
||||
self.running = False
|
||||
dfd = self._close_all_spiders()
|
||||
return dfd.addBoth(lambda _: self._finish_stopping_engine())
|
||||
@ -165,7 +167,11 @@ class ExecutionEngine:
|
||||
return d
|
||||
|
||||
def _handle_downloader_output(self, response, request, spider):
|
||||
assert isinstance(response, (Request, Response, Failure)), response
|
||||
if not isinstance(response, (Request, Response, Failure)):
|
||||
raise TypeError(
|
||||
"Incorrect type: expected Request, Response or Failure, got %s: %r"
|
||||
% (type(response), response)
|
||||
)
|
||||
# downloader middleware can return requests (for example, redirects)
|
||||
if isinstance(response, Request):
|
||||
self.crawl(response, spider)
|
||||
@ -205,8 +211,8 @@ class ExecutionEngine:
|
||||
return not bool(self.slot)
|
||||
|
||||
def crawl(self, request, spider):
|
||||
assert spider in self.open_spiders, \
|
||||
"Spider %r not opened when crawling: %s" % (spider.name, request)
|
||||
if spider not in self.open_spiders:
|
||||
raise RuntimeError("Spider %r not opened when crawling: %s" % (spider.name, request))
|
||||
self.schedule(request, spider)
|
||||
self.slot.nextcall.schedule()
|
||||
|
||||
@ -232,7 +238,11 @@ class ExecutionEngine:
|
||||
slot.add_request(request)
|
||||
|
||||
def _on_success(response):
|
||||
assert isinstance(response, (Response, Request))
|
||||
if not isinstance(response, (Response, Request)):
|
||||
raise TypeError(
|
||||
"Incorrect type: expected Response or Request, got %s: %r"
|
||||
% (type(response), response)
|
||||
)
|
||||
if isinstance(response, Response):
|
||||
response.request = request # tie request to response received
|
||||
logkws = self.logformatter.crawled(request, response, spider)
|
||||
@ -253,8 +263,8 @@ class ExecutionEngine:
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def open_spider(self, spider, start_requests=(), close_if_idle=True):
|
||||
assert self.has_capacity(), "No free spider slot when opening %r" % \
|
||||
spider.name
|
||||
if not self.has_capacity():
|
||||
raise RuntimeError("No free spider slot when opening %r" % spider.name)
|
||||
logger.info("Spider opened", extra={'spider': spider})
|
||||
nextcall = CallLaterOnce(self._next_request, spider)
|
||||
scheduler = self.scheduler_cls.from_crawler(self.crawler)
|
||||
|
@ -123,7 +123,11 @@ class Scraper:
|
||||
def _scrape(self, response, request, spider):
|
||||
"""Handle the downloaded response or failure through the spider
|
||||
callback/errback"""
|
||||
assert isinstance(response, (Response, Failure))
|
||||
if not isinstance(response, (Response, Failure)):
|
||||
raise TypeError(
|
||||
"Incorrect type: expected Response or Failure, got %s: %r"
|
||||
% (type(response), response)
|
||||
)
|
||||
|
||||
dfd = self._scrape2(response, request, spider) # returns spider's processed output
|
||||
dfd.addErrback(self.handle_spider_error, request, response, spider)
|
||||
|
@ -78,7 +78,8 @@ class Crawler:
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def crawl(self, *args, **kwargs):
|
||||
assert not self.crawling, "Crawling already taking place"
|
||||
if self.crawling:
|
||||
raise RuntimeError("Crawling already taking place")
|
||||
self.crawling = True
|
||||
|
||||
try:
|
||||
|
@ -24,7 +24,8 @@ class Request(object_ref):
|
||||
self.method = str(method).upper()
|
||||
self._set_url(url)
|
||||
self._set_body(body)
|
||||
assert isinstance(priority, int), "Request priority not an integer: %r" % priority
|
||||
if not isinstance(priority, int):
|
||||
raise TypeError("Request priority not an integer: %r" % priority)
|
||||
self.priority = priority
|
||||
|
||||
if callback is not None and not callable(callback):
|
||||
|
@ -106,7 +106,8 @@ class S3FilesStore:
|
||||
else:
|
||||
from boto.s3.connection import S3Connection
|
||||
self.S3Connection = S3Connection
|
||||
assert uri.startswith('s3://')
|
||||
if not uri.startswith("s3://"):
|
||||
raise ValueError("Incorrect URI scheme in %s, expected 's3'" % uri)
|
||||
self.bucket, self.prefix = uri[5:].split('/', 1)
|
||||
|
||||
def stat_file(self, path, info):
|
||||
@ -266,7 +267,8 @@ class FTPFilesStore:
|
||||
USE_ACTIVE_MODE = None
|
||||
|
||||
def __init__(self, uri):
|
||||
assert uri.startswith('ftp://')
|
||||
if not uri.startswith("ftp://"):
|
||||
raise ValueError("Incorrect URI scheme in %s, expected 'ftp'" % uri)
|
||||
u = urlparse(uri)
|
||||
self.port = u.port
|
||||
self.host = u.hostname
|
||||
|
@ -128,10 +128,12 @@ def csviter(obj, delimiter=None, headers=None, encoding=None, quotechar=None):
|
||||
|
||||
def _body_or_str(obj, unicode=True):
|
||||
expected_types = (Response, str, bytes)
|
||||
assert isinstance(obj, expected_types), \
|
||||
"obj must be %s, not %s" % (
|
||||
" or ".join(t.__name__ for t in expected_types),
|
||||
type(obj).__name__)
|
||||
if not isinstance(obj, expected_types):
|
||||
expected_types_str = " or ".join(t.__name__ for t in expected_types)
|
||||
raise TypeError(
|
||||
"Object %r must be %s, not %s"
|
||||
% (obj, expected_types_str, type(obj).__name__)
|
||||
)
|
||||
if isinstance(obj, Response):
|
||||
if not unicode:
|
||||
return obj.body
|
||||
|
@ -9,7 +9,8 @@ from scrapy.utils.misc import load_object
|
||||
def listen_tcp(portrange, host, factory):
|
||||
"""Like reactor.listenTCP but tries different ports in a range."""
|
||||
from twisted.internet import reactor
|
||||
assert len(portrange) <= 2, "invalid portrange: %s" % portrange
|
||||
if len(portrange) > 2:
|
||||
raise ValueError("invalid portrange: %s" % portrange)
|
||||
if not portrange:
|
||||
return reactor.listenTCP(0, factory, interface=host)
|
||||
if not hasattr(portrange, '__iter__'):
|
||||
|
@ -157,7 +157,7 @@ class XmliterTestCase(unittest.TestCase):
|
||||
|
||||
def test_xmliter_objtype_exception(self):
|
||||
i = self.xmliter(42, 'product')
|
||||
self.assertRaises(AssertionError, next, i)
|
||||
self.assertRaises(TypeError, next, i)
|
||||
|
||||
def test_xmliter_encoding(self):
|
||||
body = b'<?xml version="1.0" encoding="ISO-8859-9"?>\n<xml>\n <item>Some Turkish Characters \xd6\xc7\xde\xdd\xd0\xdc \xfc\xf0\xfd\xfe\xe7\xf6</item>\n</xml>\n\n'
|
||||
|
Loading…
x
Reference in New Issue
Block a user