1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-26 21:04:34 +00:00

removed scrapyengine import from downoader code, minor improvements to docstrings

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40684
This commit is contained in:
Pablo Hoffman 2009-01-08 14:21:20 +00:00
parent f39b7b507a
commit 6420c407d2
2 changed files with 11 additions and 10 deletions

View File

@ -43,15 +43,18 @@ class SiteDetails(object):
class Downloader(object):
"""Maintain many concurrent downloads and provide an HTTP abstraction
We will have a limited number of connections per domain and scrape many domains in
"""Maintain many concurrent downloads and provide an HTTP abstraction.
It supports a limited number of connections per domain and many domains in
parallel.
request(..) should be called to request resources using http, https or file
protocols.
"""
def __init__(self) :
def __init__(self, engine):
"""Create the downlaoder.
``engine`` is the scrapy engine controlling this downloader
"""
self.engine = engine
self.sites = {}
self.middleware = DownloaderMiddlewareManager()
self.middleware.download_function = self.enqueue
@ -113,10 +116,8 @@ class Downloader(object):
self._download(request, spider, deferred)
if site.closed and site.is_idle():
# XXX: Remove scrapyengine reference
del self.sites[domain]
from scrapy.core.engine import scrapyengine
scrapyengine.closed_domain(domain=domain)
self.engine.closed_domain(domain)
def _download(self, request, spider, deferred):
log.msg('Activating %s' % request.traceinfo(), log.TRACE)

View File

@ -56,7 +56,7 @@ class ExecutionEngine(object):
Configure execution engine with the given scheduling policy and downloader.
"""
self.scheduler = scheduler or Scheduler()
self.downloader = downloader or Downloader()
self.downloader = downloader or Downloader(self)
self.spidermiddleware = SpiderMiddlewareManager()
self._scraping = {}
self.pipeline = ItemPipelineManager()