1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-26 15:43:44 +00:00

added rule shorthand, for creating CrawlSpider rules

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40864
This commit is contained in:
Ismael Carnales 2009-02-18 11:18:11 +00:00
parent 84f63b146b
commit b20ac057b3
2 changed files with 10 additions and 1 deletions

View File

@ -1,2 +1,2 @@
from scrapy.contrib.spiders.crawl import CrawlSpider, Rule
from scrapy.contrib.spiders.crawl import CrawlSpider, Rule, rule
from scrapy.contrib.spiders.feed import XMLFeedSpider, CSVFeedSpider

View File

@ -10,6 +10,7 @@ import copy
from scrapy.http import Request
from scrapy.spider import BaseSpider
from scrapy.conf import settings
from scrapy.link.extractors import RegexLinkExtractor
class Rule(object):
"""
@ -45,6 +46,14 @@ class Rule(object):
else:
self.follow = follow
def rule(regex, callback=None, exclude=None, follow=None):
"""Shorthand for creating CrawlSpider rules"""
deny = [exclude] if exclude else []
link_extractor = RegexLinkExtractor(allow=[regex], deny=deny)
return Rule(link_extractor, callback=callback, follow=follow)
class CrawlSpider(BaseSpider):
"""
Class for spiders that crawl over web pages and extract/parse their links