1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-22 21:44:02 +00:00

scrapy report util improved

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40134
This commit is contained in:
elpolilla 2008-07-29 01:14:01 +00:00
parent 02b87f7d49
commit 640e8b9131
2 changed files with 15 additions and 11 deletions

View File

@ -20,7 +20,7 @@ class Command(ScrapyCommand):
parser.add_option("--record", dest="record", help="use FILE for recording session (see replay command)", metavar="FILE")
parser.add_option("--record-dir", dest="recorddir", help="use DIR for recording (instead of file)", metavar="DIR")
parser.add_option("--report", dest="doreport", action='store_true', help="generate a report of the scraped products in a text file")
parser.add_option("--report-dropped", dest="doreport_dropped", action="store_true", help="choose whether to report dropped products or not")
parser.add_option("--report-dropped", dest="doreport_dropped", action="store_true", help="generate a report of the dropped products in a text file")
def process_options(self, args, opts):
ScrapyCommand.process_options(self, args, opts)
@ -38,8 +38,8 @@ class Command(ScrapyCommand):
# disconnecting since pydispatcher uses weak references
self.replay = Replay(opts.record or opts.recorddir, mode='record', usedir=bool(opts.recorddir))
self.replay.record(args=args, opts=opts.__dict__)
if opts.doreport:
self.report = Report(dropped=opts.doreport_dropped)
if opts.doreport or opts.doreport_dropped:
self.report = Report(passed=opts.doreport, dropped=opts.doreport_dropped)
def run(self, args, opts):
scrapymanager.runonce(*args, **opts.__dict__)

View File

@ -4,16 +4,18 @@ from pydispatch import dispatcher
from scrapy.core import signals
class Report(object):
def __init__(self, dropped):
def __init__(self, passed, dropped):
self.domain = ''
self.passed_file = None
self.dropped_file = None
self.passed = passed
self.dropped = dropped
self.total = { 'passed': 0, 'dropped': 0 }
dispatcher.connect(self.domain_open, signal=signals.domain_open)
dispatcher.connect(self.item_passed, signal=signals.item_passed)
dispatcher.connect(self.engine_stopped, signal=signals.engine_stopped)
if self.passed:
dispatcher.connect(self.item_passed, signal=signals.item_passed)
if self.dropped:
dispatcher.connect(self.item_dropped, signal=signals.item_dropped)
@ -28,7 +30,7 @@ class Report(object):
if product.variants:
product_text = '%s\n##Variants\n%s' % (product_text, ''.join([self.get_product_attribs(variant) for variant in product.variants]))
if dropped:
product_text = '%sdropping reason: %s\n' % (product_text, dropped)
product_text = '%s--- Dropping reason: %s ---\n' % (product_text, dropped)
product_text = product_text + '\n\n'
return product_text
@ -36,9 +38,10 @@ class Report(object):
self.domain = domain
now = datetime.now()
filename = '%s_%s_%s.report' % (self.domain, now.strftime('%Y%m%d'), now.strftime('%H%M'))
self.passed_file = open(filename, 'w')
self.passed_file.write('Scraping results for domain "%s"\n\n%s%s%s' % (self.domain, '##################################\n',
'### Products scraped correctly ###\n', '##################################\n'))
if self.passed:
self.passed_file = open(filename, 'w')
self.passed_file.write('Scraping results for domain "%s"\n\n%s%s%s' % (self.domain, '##################################\n',
'### Products scraped correctly ###\n', '##################################\n'))
if self.dropped:
self.dropped_file = open(filename + '.dropped', 'w')
self.dropped_file.write('Scraping results for domain "%s"\n\n%s%s%s' % (self.domain,
@ -53,8 +56,9 @@ class Report(object):
self.dropped_file.write(self.get_product_text(item, exception))
def engine_stopped(self):
self.passed_file.write('\n--- Total scraped products: %d\n' % self.total['passed'])
self.passed_file.close()
if self.passed:
self.passed_file.write('\n--- Total scraped products: %d\n' % self.total['passed'])
self.passed_file.close()
if self.dropped:
self.dropped_file.write('\n--- Total dropped products: %d\n' % self.total['dropped'])
self.dropped_file.close()