diff --git a/scrapy/commands/__init__.py b/scrapy/commands/__init__.py index a573a03d9..9f8e6986a 100644 --- a/scrapy/commands/__init__.py +++ b/scrapy/commands/__init__.py @@ -23,7 +23,8 @@ class ScrapyCommand: self.settings = None # set in scrapy.cmdline def set_crawler(self, crawler): - assert not hasattr(self, '_crawler'), "crawler already set" + if hasattr(self, '_crawler'): + raise RuntimeError("crawler already set") self._crawler = crawler def syntax(self): diff --git a/scrapy/contracts/default.py b/scrapy/contracts/default.py index 3002fc702..a1b0f8f22 100644 --- a/scrapy/contracts/default.py +++ b/scrapy/contracts/default.py @@ -58,7 +58,11 @@ class ReturnsContract(Contract): def __init__(self, *args, **kwargs): super(ReturnsContract, self).__init__(*args, **kwargs) - assert len(self.args) in [1, 2, 3] + if len(self.args) not in [1, 2, 3]: + raise ValueError( + "Incorrect argument quantity: expected 1, 2 or 3, got %i" + % len(self.args) + ) self.obj_name = self.args[0] or None self.obj_type = self.objects[self.obj_name] diff --git a/scrapy/core/downloader/middleware.py b/scrapy/core/downloader/middleware.py index 5a03dcdf7..4c2eea522 100644 --- a/scrapy/core/downloader/middleware.py +++ b/scrapy/core/downloader/middleware.py @@ -45,8 +45,9 @@ class DownloaderMiddlewareManager(MiddlewareManager): @defer.inlineCallbacks def process_response(response): - assert response is not None, 'Received None in process_response' - if isinstance(response, Request): + if response is None: + raise TypeError("Received None in process_response") + elif isinstance(response, Request): return response for method in self.methods['process_response']: diff --git a/scrapy/core/engine.py b/scrapy/core/engine.py index 66cf9ad9a..77d71846e 100644 --- a/scrapy/core/engine.py +++ b/scrapy/core/engine.py @@ -73,7 +73,8 @@ class ExecutionEngine: @defer.inlineCallbacks def start(self): """Start the execution engine""" - assert not self.running, "Engine already running" + if self.running: + raise RuntimeError("Engine already running") self.start_time = time() yield self.signals.send_catch_log_deferred(signal=signals.engine_started) self.running = True @@ -82,7 +83,8 @@ class ExecutionEngine: def stop(self): """Stop the execution engine gracefully""" - assert self.running, "Engine not running" + if not self.running: + raise RuntimeError("Engine not running") self.running = False dfd = self._close_all_spiders() return dfd.addBoth(lambda _: self._finish_stopping_engine()) @@ -165,7 +167,11 @@ class ExecutionEngine: return d def _handle_downloader_output(self, response, request, spider): - assert isinstance(response, (Request, Response, Failure)), response + if not isinstance(response, (Request, Response, Failure)): + raise TypeError( + "Incorrect type: expected Request, Response or Failure, got %s: %r" + % (type(response), response) + ) # downloader middleware can return requests (for example, redirects) if isinstance(response, Request): self.crawl(response, spider) @@ -205,8 +211,8 @@ class ExecutionEngine: return not bool(self.slot) def crawl(self, request, spider): - assert spider in self.open_spiders, \ - "Spider %r not opened when crawling: %s" % (spider.name, request) + if spider not in self.open_spiders: + raise RuntimeError("Spider %r not opened when crawling: %s" % (spider.name, request)) self.schedule(request, spider) self.slot.nextcall.schedule() @@ -232,7 +238,11 @@ class ExecutionEngine: slot.add_request(request) def _on_success(response): - assert isinstance(response, (Response, Request)) + if not isinstance(response, (Response, Request)): + raise TypeError( + "Incorrect type: expected Response or Request, got %s: %r" + % (type(response), response) + ) if isinstance(response, Response): response.request = request # tie request to response received logkws = self.logformatter.crawled(request, response, spider) @@ -253,8 +263,8 @@ class ExecutionEngine: @defer.inlineCallbacks def open_spider(self, spider, start_requests=(), close_if_idle=True): - assert self.has_capacity(), "No free spider slot when opening %r" % \ - spider.name + if not self.has_capacity(): + raise RuntimeError("No free spider slot when opening %r" % spider.name) logger.info("Spider opened", extra={'spider': spider}) nextcall = CallLaterOnce(self._next_request, spider) scheduler = self.scheduler_cls.from_crawler(self.crawler) diff --git a/scrapy/core/scraper.py b/scrapy/core/scraper.py index 3e4826216..edbb4dd66 100644 --- a/scrapy/core/scraper.py +++ b/scrapy/core/scraper.py @@ -123,7 +123,11 @@ class Scraper: def _scrape(self, response, request, spider): """Handle the downloaded response or failure through the spider callback/errback""" - assert isinstance(response, (Response, Failure)) + if not isinstance(response, (Response, Failure)): + raise TypeError( + "Incorrect type: expected Response or Failure, got %s: %r" + % (type(response), response) + ) dfd = self._scrape2(response, request, spider) # returns spider's processed output dfd.addErrback(self.handle_spider_error, request, response, spider) diff --git a/scrapy/crawler.py b/scrapy/crawler.py index 20990ea41..6f43771e2 100644 --- a/scrapy/crawler.py +++ b/scrapy/crawler.py @@ -78,7 +78,8 @@ class Crawler: @defer.inlineCallbacks def crawl(self, *args, **kwargs): - assert not self.crawling, "Crawling already taking place" + if self.crawling: + raise RuntimeError("Crawling already taking place") self.crawling = True try: diff --git a/scrapy/http/request/__init__.py b/scrapy/http/request/__init__.py index 0a6637af8..a98ba9960 100644 --- a/scrapy/http/request/__init__.py +++ b/scrapy/http/request/__init__.py @@ -24,7 +24,8 @@ class Request(object_ref): self.method = str(method).upper() self._set_url(url) self._set_body(body) - assert isinstance(priority, int), "Request priority not an integer: %r" % priority + if not isinstance(priority, int): + raise TypeError("Request priority not an integer: %r" % priority) self.priority = priority if callback is not None and not callable(callback): diff --git a/scrapy/pipelines/files.py b/scrapy/pipelines/files.py index aab645d3d..ae365db5b 100644 --- a/scrapy/pipelines/files.py +++ b/scrapy/pipelines/files.py @@ -106,7 +106,8 @@ class S3FilesStore: else: from boto.s3.connection import S3Connection self.S3Connection = S3Connection - assert uri.startswith('s3://') + if not uri.startswith("s3://"): + raise ValueError("Incorrect URI scheme in %s, expected 's3'" % uri) self.bucket, self.prefix = uri[5:].split('/', 1) def stat_file(self, path, info): @@ -266,7 +267,8 @@ class FTPFilesStore: USE_ACTIVE_MODE = None def __init__(self, uri): - assert uri.startswith('ftp://') + if not uri.startswith("ftp://"): + raise ValueError("Incorrect URI scheme in %s, expected 'ftp'" % uri) u = urlparse(uri) self.port = u.port self.host = u.hostname diff --git a/scrapy/utils/iterators.py b/scrapy/utils/iterators.py index b71419111..5e15bf0c8 100644 --- a/scrapy/utils/iterators.py +++ b/scrapy/utils/iterators.py @@ -128,10 +128,12 @@ def csviter(obj, delimiter=None, headers=None, encoding=None, quotechar=None): def _body_or_str(obj, unicode=True): expected_types = (Response, str, bytes) - assert isinstance(obj, expected_types), \ - "obj must be %s, not %s" % ( - " or ".join(t.__name__ for t in expected_types), - type(obj).__name__) + if not isinstance(obj, expected_types): + expected_types_str = " or ".join(t.__name__ for t in expected_types) + raise TypeError( + "Object %r must be %s, not %s" + % (obj, expected_types_str, type(obj).__name__) + ) if isinstance(obj, Response): if not unicode: return obj.body diff --git a/scrapy/utils/reactor.py b/scrapy/utils/reactor.py index 5308812d6..3c705f69b 100644 --- a/scrapy/utils/reactor.py +++ b/scrapy/utils/reactor.py @@ -9,7 +9,8 @@ from scrapy.utils.misc import load_object def listen_tcp(portrange, host, factory): """Like reactor.listenTCP but tries different ports in a range.""" from twisted.internet import reactor - assert len(portrange) <= 2, "invalid portrange: %s" % portrange + if len(portrange) > 2: + raise ValueError("invalid portrange: %s" % portrange) if not portrange: return reactor.listenTCP(0, factory, interface=host) if not hasattr(portrange, '__iter__'): diff --git a/tests/test_utils_iterators.py b/tests/test_utils_iterators.py index ec8311298..a85087619 100644 --- a/tests/test_utils_iterators.py +++ b/tests/test_utils_iterators.py @@ -157,7 +157,7 @@ class XmliterTestCase(unittest.TestCase): def test_xmliter_objtype_exception(self): i = self.xmliter(42, 'product') - self.assertRaises(AssertionError, next, i) + self.assertRaises(TypeError, next, i) def test_xmliter_encoding(self): body = b'\n\n Some Turkish Characters \xd6\xc7\xde\xdd\xd0\xdc \xfc\xf0\xfd\xfe\xe7\xf6\n\n\n'