mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-26 22:04:01 +00:00
SEP-017 contracts: first draft
This commit is contained in:
parent
19bcb44c25
commit
99b76eaa2c
6
.gitignore
vendored
6
.gitignore
vendored
@ -1,6 +1,12 @@
|
||||
*.pyc
|
||||
*swp
|
||||
*~
|
||||
|
||||
_trial_temp
|
||||
dropin.cache
|
||||
docs/build
|
||||
*egg-info
|
||||
.tox
|
||||
|
||||
build/
|
||||
dist/
|
||||
|
50
scrapy/commands/check.py
Normal file
50
scrapy/commands/check.py
Normal file
@ -0,0 +1,50 @@
|
||||
from functools import wraps
|
||||
|
||||
from scrapy.command import ScrapyCommand
|
||||
from scrapy.http import Request
|
||||
|
||||
from scrapy.contracts import Contract
|
||||
|
||||
class Command(ScrapyCommand):
|
||||
requires_project = True
|
||||
|
||||
def syntax(self):
|
||||
return "[options] <spider>"
|
||||
|
||||
def short_desc(self):
|
||||
return "Check contracts for given spider"
|
||||
|
||||
def run(self, args, opts):
|
||||
self.crawler.engine.has_capacity = lambda: True
|
||||
|
||||
for spider in args or self.crawler.spiders.list():
|
||||
spider = self.crawler.spiders.create(spider)
|
||||
requests = self.get_requests(spider)
|
||||
self.crawler.crawl(spider, requests)
|
||||
|
||||
self.crawler.start()
|
||||
|
||||
def get_requests(self, spider):
|
||||
requests = []
|
||||
|
||||
for key, value in vars(type(spider)).iteritems():
|
||||
if callable(value) and value.__doc__:
|
||||
bound_method = value.__get__(spider, type(spider))
|
||||
request = Request(url='http://scrapy.org', callback=bound_method)
|
||||
|
||||
# register contract hooks to the request
|
||||
contracts = Contract.from_method(value)
|
||||
for contract in contracts:
|
||||
request = contract.prepare_request(request)
|
||||
|
||||
# discard anything the request might return
|
||||
cb = request.callback
|
||||
@wraps(cb)
|
||||
def wrapper(response):
|
||||
cb(response)
|
||||
|
||||
request.callback = wrapper
|
||||
|
||||
requests.append(request)
|
||||
|
||||
return requests
|
2
scrapy/contracts/__init__.py
Normal file
2
scrapy/contracts/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
from .base import Contract, ContractType
|
||||
from .default import *
|
74
scrapy/contracts/base.py
Normal file
74
scrapy/contracts/base.py
Normal file
@ -0,0 +1,74 @@
|
||||
import re
|
||||
from functools import wraps
|
||||
|
||||
from scrapy.utils.spider import iterate_spider_output
|
||||
|
||||
class ContractType(type):
|
||||
""" Metaclass for contracts
|
||||
- automatically registers contracts in the root `Contract` class
|
||||
"""
|
||||
|
||||
def __new__(meta, name, bases, dct):
|
||||
# only allow single inheritence
|
||||
assert len(bases) == 1, 'Multiple inheritance is not allowed'
|
||||
base = bases[0]
|
||||
|
||||
# ascend in inheritence chain
|
||||
while type(base) not in [type, meta]:
|
||||
base = type(base)
|
||||
|
||||
# register this as a valid contract
|
||||
cls = type.__new__(meta, name, bases, dct)
|
||||
if type(base) != type:
|
||||
base.registered[cls.name] = cls
|
||||
return cls
|
||||
|
||||
|
||||
class Contract(object):
|
||||
""" Abstract class for contracts
|
||||
- keeps a reference of all derived classes in `registered`
|
||||
"""
|
||||
|
||||
__metaclass__ = ContractType
|
||||
registered = {}
|
||||
|
||||
def __init__(self, method, *args):
|
||||
self.method = method
|
||||
self.args = args
|
||||
|
||||
@classmethod
|
||||
def from_method(cls, method):
|
||||
contracts = []
|
||||
for line in method.__doc__.split('\n'):
|
||||
line = line.strip()
|
||||
|
||||
if line.startswith('@'):
|
||||
name, args = re.match(r'@(\w+)\s*(.*)', line).groups()
|
||||
args = re.split(r'[\,\s+]', args)
|
||||
args = filter(lambda x:x, args)
|
||||
|
||||
contracts.append(cls.registered[name](method, *args))
|
||||
|
||||
return contracts
|
||||
|
||||
def prepare_request(self, request):
|
||||
cb = request.callback
|
||||
@wraps(cb)
|
||||
def wrapper(response):
|
||||
self.pre_process(response)
|
||||
output = list(iterate_spider_output(cb(response)))
|
||||
self.post_process(output)
|
||||
return output
|
||||
|
||||
request.callback = wrapper
|
||||
request = self.modify_request(request)
|
||||
return request
|
||||
|
||||
def modify_request(self, request):
|
||||
return request
|
||||
|
||||
def pre_process(self, response):
|
||||
pass
|
||||
|
||||
def post_process(self, output):
|
||||
pass
|
23
scrapy/contracts/default.py
Normal file
23
scrapy/contracts/default.py
Normal file
@ -0,0 +1,23 @@
|
||||
from scrapy.item import BaseItem
|
||||
|
||||
from .base import Contract
|
||||
|
||||
|
||||
# contracts
|
||||
class UrlContract(Contract):
|
||||
name = 'url'
|
||||
|
||||
def modify_request(self, request):
|
||||
return request.replace(url=self.args[0])
|
||||
|
||||
class ReturnsRequestContract(Contract):
|
||||
name = 'returns_request'
|
||||
|
||||
class ScrapesContract(Contract):
|
||||
name = 'scrapes'
|
||||
|
||||
def post_process(self, output):
|
||||
for x in output:
|
||||
if isinstance(x, BaseItem):
|
||||
for arg in self.args:
|
||||
assert arg in x, '%r field is missing' % arg
|
Loading…
x
Reference in New Issue
Block a user