mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-26 12:03:40 +00:00
added rule shorthand, for creating CrawlSpider rules
--HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40864
This commit is contained in:
parent
84f63b146b
commit
b20ac057b3
@ -1,2 +1,2 @@
|
||||
from scrapy.contrib.spiders.crawl import CrawlSpider, Rule
|
||||
from scrapy.contrib.spiders.crawl import CrawlSpider, Rule, rule
|
||||
from scrapy.contrib.spiders.feed import XMLFeedSpider, CSVFeedSpider
|
||||
|
@ -10,6 +10,7 @@ import copy
|
||||
from scrapy.http import Request
|
||||
from scrapy.spider import BaseSpider
|
||||
from scrapy.conf import settings
|
||||
from scrapy.link.extractors import RegexLinkExtractor
|
||||
|
||||
class Rule(object):
|
||||
"""
|
||||
@ -45,6 +46,14 @@ class Rule(object):
|
||||
else:
|
||||
self.follow = follow
|
||||
|
||||
|
||||
def rule(regex, callback=None, exclude=None, follow=None):
|
||||
"""Shorthand for creating CrawlSpider rules"""
|
||||
deny = [exclude] if exclude else []
|
||||
link_extractor = RegexLinkExtractor(allow=[regex], deny=deny)
|
||||
return Rule(link_extractor, callback=callback, follow=follow)
|
||||
|
||||
|
||||
class CrawlSpider(BaseSpider):
|
||||
"""
|
||||
Class for spiders that crawl over web pages and extract/parse their links
|
||||
|
Loading…
x
Reference in New Issue
Block a user