mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-27 08:43:43 +00:00
added parse2 command (candidate to replace parse command)
--HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40448
This commit is contained in:
parent
6d5a130b96
commit
778b77f2bb
52
scrapy/trunk/scrapy/command/commands/parse2.py
Normal file
52
scrapy/trunk/scrapy/command/commands/parse2.py
Normal file
@ -0,0 +1,52 @@
|
||||
from scrapy.command import ScrapyCommand
|
||||
from scrapy.fetcher import fetch
|
||||
from scrapy.http import Request
|
||||
from scrapy.item import ScrapedItem
|
||||
from scrapy.spider import spiders
|
||||
from scrapy.utils import display
|
||||
from scrapy import log
|
||||
|
||||
class Command(ScrapyCommand):
|
||||
def syntax(self):
|
||||
return "[options] <url> <method>"
|
||||
|
||||
def short_desc(self):
|
||||
return "Parse the URL with the given spider method and print the results"
|
||||
|
||||
def add_options(self, parser):
|
||||
ScrapyCommand.add_options(self, parser)
|
||||
parser.add_option("--nolinks", dest="nolinks", action="store_true", help="show extracted links")
|
||||
parser.add_option("--noitems", dest="noitems", action="store_true", help="don't show scraped items")
|
||||
parser.add_option("--nocolour", dest="nocolour", action="store_true", help="avoid using pygments to colorize the output")
|
||||
|
||||
def pipeline_process(self, item, opts):
|
||||
return item
|
||||
|
||||
def run(self, args, opts):
|
||||
if len(args) != 2:
|
||||
print "A URL and method is required"
|
||||
return
|
||||
|
||||
url, method = args
|
||||
responses = fetch([url])
|
||||
for response in responses:
|
||||
spider = spiders.fromurl(response.url)
|
||||
if spider:
|
||||
method = getattr(spider, method)
|
||||
result = method(response)
|
||||
|
||||
links = [i for i in result if isinstance(i, Request)]
|
||||
items = [self.pipeline_process(i, opts) for i in result if isinstance(i, ScrapedItem)]
|
||||
for item in items:
|
||||
item.__dict__.pop('_adaptors_dict', None)
|
||||
|
||||
display.nocolour = opts.nocolour
|
||||
if not opts.noitems:
|
||||
print "# Scraped Items", "-"*60
|
||||
display.pprint(items)
|
||||
|
||||
if opts.nolinks:
|
||||
print "# Links", "-"*68
|
||||
display.pprint(links)
|
||||
else:
|
||||
log.msg('cannot find spider for url: %s' % response.url, level=log.ERROR)
|
Loading…
x
Reference in New Issue
Block a user