1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-25 21:03:40 +00:00

spidermw: check for __iter__ instead of trying to iter() that may cause that a string pass as iterable

This commit is contained in:
Daniel Grana 2009-05-28 21:10:30 -03:00
parent 0f690b03dc
commit cfafa01109

View File

@ -14,10 +14,7 @@ from scrapy.utils.middleware import build_middleware_list
from scrapy.conf import settings from scrapy.conf import settings
def _isiterable(possible_iterator): def _isiterable(possible_iterator):
try: return hasattr(possible_iterator, '__iter__')
return iter(possible_iterator)
except TypeError:
return None
class SpiderMiddlewareManager(object): class SpiderMiddlewareManager(object):
def __init__(self): def __init__(self):
@ -63,16 +60,7 @@ class SpiderMiddlewareManager(object):
(fname(method), type(result)) (fname(method), type(result))
if result is not None: if result is not None:
return result return result
return self.call(request=request, response=response, spider=spider) return self.call(request, response, spider)
def process_spider_output(result):
for method in self.result_middleware:
result = method(response=response, result=result, spider=spider)
assert _isiterable(result), \
'Middleware %s must returns an iterable object, got %s ' % \
(fname(method), type(result))
return result
def process_spider_exception(_failure): def process_spider_exception(_failure):
exception = _failure.value exception = _failure.value
@ -85,12 +73,20 @@ class SpiderMiddlewareManager(object):
return result return result
return _failure return _failure
def process_spider_output(result):
for method in self.result_middleware:
result = method(response=response, result=result, spider=spider)
assert _isiterable(result), \
'Middleware %s must returns an iterable object, got %s ' % \
(fname(method), type(result))
return result
dfd = mustbe_deferred(process_spider_input, response) dfd = mustbe_deferred(process_spider_input, response)
dfd.addErrback(process_spider_exception) dfd.addErrback(process_spider_exception)
dfd.addCallback(process_spider_output) dfd.addCallback(process_spider_output)
return dfd return dfd
def call(self, request, response, spider): def call(self, request, result, spider):
defer_result(response).chainDeferred(request.deferred) defer_result(result).chainDeferred(request.deferred)
request.deferred.addCallback(arg_to_iter) request.deferred.addCallback(arg_to_iter)
return request.deferred return request.deferred