mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-26 17:24:38 +00:00
added some references to documentation and fixed some doc typos (thanks Patrick)
--HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40813
This commit is contained in:
parent
c672223091
commit
16efbf87fc
@ -1,5 +1,5 @@
|
||||
"""
|
||||
Common downloader middleare
|
||||
Common downloader middleware
|
||||
|
||||
See documentation in docs/ref/downloader-middleware.rst
|
||||
"""
|
||||
|
@ -32,7 +32,7 @@ class CompressionMiddleware(object):
|
||||
body = zlib.decompress(body)
|
||||
except zlib.error:
|
||||
# ugly hack to work with raw deflate content that may
|
||||
# be sent by microsof servers. For more information, see:
|
||||
# be sent by microsoft servers. For more information, see:
|
||||
# http://carsten.codimi.de/gzip.yaws/
|
||||
# http://www.port80software.com/200ok/archive/2005/10/31/868.aspx
|
||||
# http://www.gzip.org/zlib/zlib_faq.html#faq38
|
||||
|
@ -2,7 +2,7 @@
|
||||
An extension to retry failed requests that are potentially caused by temporary
|
||||
problems such as a connection timeout or HTTP 500 error.
|
||||
|
||||
You can change the behaviour of this moddileware by modifing the scraping settings:
|
||||
You can change the behaviour of this middleware by modifing the scraping settings:
|
||||
RETRY_TIMES - how many times to retry a failed page
|
||||
RETRY_HTTP_CODES - which HTTP response codes to retry
|
||||
|
||||
|
@ -1,3 +1,10 @@
|
||||
"""
|
||||
This modules implements the CrawlSpider which is the recommended spider to use
|
||||
for scraping typical web sites that requires crawling pages.
|
||||
|
||||
See documentation in docs/ref/spiders.rst
|
||||
"""
|
||||
|
||||
import copy
|
||||
|
||||
from scrapy.http import Request
|
||||
|
@ -1,4 +1,10 @@
|
||||
# -*- coding: utf8 -*-
|
||||
"""
|
||||
This module implements the XMLFeedSpider which is the recommended spider to use
|
||||
for scraping from an XML feed.
|
||||
|
||||
See documentation in docs/ref/spiders.rst
|
||||
"""
|
||||
|
||||
from scrapy.spider import BaseSpider
|
||||
from scrapy.item import ScrapedItem
|
||||
from scrapy.http import Request
|
||||
|
@ -1,5 +1,7 @@
|
||||
"""
|
||||
LinkExtractor provides en efficient way to extract links from pages
|
||||
|
||||
See documentation in docs/ref/link-extractors.rst
|
||||
"""
|
||||
|
||||
from scrapy.utils.python import FixedSGMLParser, unique as unique_list
|
||||
|
@ -1,7 +1,8 @@
|
||||
"""
|
||||
This module provides some LinkExtractors, which extend that base LinkExtractor
|
||||
(scrapy.link.LinkExtractor) with some useful features.
|
||||
This module provides some LinkExtractors, which extend the base LinkExtractor
|
||||
(scrapy.link.LinkExtractor) with some addutional useful features.
|
||||
|
||||
See documentation in docs/ref/link-extractors.rst
|
||||
"""
|
||||
|
||||
import re
|
||||
|
@ -1,5 +1,7 @@
|
||||
"""
|
||||
Base class for scrapy spiders
|
||||
Base class for Scrapy spiders
|
||||
|
||||
See documentation in docs/ref/spiders.rst
|
||||
"""
|
||||
from zope.interface import Interface, Attribute, invariant, implements
|
||||
from twisted.plugin import IPlugin
|
||||
|
Loading…
x
Reference in New Issue
Block a user