mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-25 08:24:05 +00:00
129 lines
1.8 KiB
ReStructuredText
129 lines
1.8 KiB
ReStructuredText
.. _topics-crawlspider-v2:
|
|
|
|
==============
|
|
CrawlSpider v2
|
|
==============
|
|
|
|
Introduction
|
|
============
|
|
|
|
TODO: introduction
|
|
|
|
Rules Matching
|
|
==============
|
|
|
|
TODO: describe purpose of rules
|
|
|
|
Request Extractors & Processors
|
|
===============================
|
|
|
|
TODO: describe purpose of extractors & processors
|
|
|
|
Examples
|
|
========
|
|
|
|
TODO: plenty of examples
|
|
|
|
|
|
.. module:: scrapy.contrib_exp.crawlspider.spider
|
|
:synopsis: CrawlSpider
|
|
|
|
|
|
Reference
|
|
=========
|
|
|
|
CrawlSpider
|
|
-----------
|
|
|
|
TODO: describe crawlspider
|
|
|
|
.. class:: CrawlSpider
|
|
|
|
TODO: describe class
|
|
|
|
|
|
.. module:: scrapy.contrib_exp.crawlspider.rules
|
|
:synopsis: Rules
|
|
|
|
Rules
|
|
-----
|
|
|
|
TODO: describe spider rules
|
|
|
|
.. class:: Rule
|
|
|
|
TODO: describe Rules class
|
|
|
|
|
|
.. module:: scrapy.contrib_exp.crawlspider.reqext
|
|
:synopsis: Request Extractors
|
|
|
|
Request Extractors
|
|
------------------
|
|
|
|
TODO: describe extractors purpose
|
|
|
|
.. class:: BaseSgmlRequestExtractor
|
|
|
|
TODO: describe base extractor
|
|
|
|
.. class:: SgmlRequestExtractor
|
|
|
|
TODO: describe sgml extractor
|
|
|
|
.. class:: XPathRequestExtractor
|
|
|
|
TODO: describe xpath request extractor
|
|
|
|
|
|
.. module:: scrapy.contrib_exp.crawlspider.reqproc
|
|
:synopsis: Request Processors
|
|
|
|
Request Processors
|
|
------------------
|
|
|
|
TODO: describe request processors
|
|
|
|
.. class:: Canonicalize
|
|
|
|
TODO: describe proc
|
|
|
|
.. class:: Unique
|
|
|
|
TODO: describe unique
|
|
|
|
.. class:: FilterDomain
|
|
|
|
TODO: describe filter domain
|
|
|
|
.. class:: FilterUrl
|
|
|
|
TODO: describe filter url
|
|
|
|
|
|
.. module:: scrapy.contrib_exp.crawlspider.matchers
|
|
:synopsis: Matchers
|
|
|
|
Request/Response Matchers
|
|
-------------------------
|
|
|
|
TODO: describe matchers
|
|
|
|
.. class:: BaseMatcher
|
|
|
|
TODO: describe base matcher
|
|
|
|
.. class:: UrlMatcher
|
|
|
|
TODO: describe url matcher
|
|
|
|
.. class:: UrlRegexMatcher
|
|
|
|
TODO: describe UrlListMatcher
|
|
|
|
.. class:: UrlListMatcher
|
|
|
|
TODO: describe url list matcher
|
|
|
|
|