1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-25 02:03:50 +00:00

engine: some extra simplifications and removed debug mode

This commit is contained in:
Pablo Hoffman 2009-06-22 23:01:41 -03:00
parent a1cca0da50
commit a8fd107ad4
3 changed files with 6 additions and 25 deletions

View File

@ -395,15 +395,6 @@ The class used to detect and filter duplicate requests.
The default (``RequestFingerprintDupeFilter``) filters based on request fingerprint
(using ``scrapy.utils.request.request_fingerprint``) and grouping per domain.
.. setting:: ENGINE_DEBUG
ENGINE_DEBUG
------------
Default: ``False``
Whether to enable the Scrapy Engine debugging mode.
.. setting:: ENABLED_SPIDERS_FILE
ENABLED_SPIDERS_FILE

View File

@ -84,8 +84,6 @@ DUPEFILTER_CLASS = 'scrapy.contrib.dupefilter.RequestFingerprintDupeFilter'
ENABLED_SPIDERS_FILE = ''
ENGINE_DEBUG = False
EXTENSIONS = [
'scrapy.stats.corestats.CoreStats',
'scrapy.xpath.extension.ResponseLibxml2',

View File

@ -24,7 +24,6 @@ from scrapy.item.pipeline import ItemPipelineManager
from scrapy.spider import spiders
from scrapy.spider.middleware import SpiderMiddlewareManager
from scrapy.utils.defer import chain_deferred, deferred_imap
from scrapy.utils.request import request_info
from scrapy.utils.misc import load_object
from scrapy.utils.defer import mustbe_deferred
@ -48,8 +47,7 @@ class ExecutionEngine(object):
def __init__(self):
self.configured = False
self.keep_alive = False
self.closing = {} # domains being closed
self.debug_mode = settings.getbool('ENGINE_DEBUG')
self.closing = {} # dict (domain -> reason) of spiders being closed
self.tasks = []
self.ports = []
self.running = False
@ -161,8 +159,7 @@ class ExecutionEngine(object):
def next_domain(self):
domain = self.domain_scheduler.next_domain()
if domain:
spider = spiders.fromdomain(domain)
self.open_domain(domain, spider)
self.open_domain(domain)
return domain
def next_request(self, spider, now=False):
@ -295,22 +292,18 @@ class ExecutionEngine(object):
return self._stop_if_idle()
def download(self, request, spider):
if self.debug_mode:
log.msg('Downloading %s' % request_info(request), log.DEBUG)
domain = spider.domain_name
referer = request.headers.get('Referer', None)
def _on_success(response):
"""handle the result of a page download"""
assert isinstance(response, (Response, Request))
if self.debug_mode:
log.msg("Requested %s" % request_info(request), level=log.DEBUG, domain=domain)
if isinstance(response, Response):
response.request = request # tie request to obtained response
response.request = request # tie request to response received
log.msg("Crawled %s from <%s>" % (response, referer), level=log.DEBUG, domain=domain)
return response
elif isinstance(response, Request):
newrequest = response # proper alias
newrequest = response
schd = self.schedule(newrequest, spider)
chain_deferred(schd, newrequest.deferred)
return schd
@ -332,12 +325,11 @@ class ExecutionEngine(object):
dwld.addBoth(_on_complete)
return deferred
def open_domain(self, domain, spider=None):
def open_domain(self, domain):
log.msg("Domain opened", domain=domain)
spider = spider or spiders.fromdomain(domain)
spider = spiders.fromdomain(domain)
self.next_request(spider)
self.closing.pop(domain, None)
self.downloader.open_domain(domain)
self.pipeline.open_domain(domain)
self._scraping[domain] = set()