1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-27 04:44:26 +00:00

Fixed ExtractImages adaptor, which wasnt looking for a base tag

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40414
This commit is contained in:
elpolilla 2008-11-25 01:41:22 +00:00
parent 5028146e8c
commit f558a49d2c

View File

@ -3,6 +3,7 @@ Adaptors related with extraction of data
"""
import urlparse
import re
from scrapy.http import Response
from scrapy.utils.python import flatten
from scrapy.xpath.selector import XPathSelector, XPathSelectorList
@ -51,7 +52,19 @@ class ExtractImages(object):
Output: list of unicodes
"""
def __init__(self, response=None, base_url=None):
self.base_url = response.url if response else base_url
BASETAG_RE = re.compile(r'<base\s+href\s*=\s*[\"\']\s*([^\"\'\s]+)\s*[\"\']', re.I)
if response:
match = BASETAG_RE.search(response.body.to_string()[0:4096])
if match:
self.base_url = match.group(1)
else:
self.base_url = response.url
else:
self.base_url = base_url
if not self.base_url:
raise AttributeError('You must specify either a response or a base_url to the ExtractImages adaptor.')
def extract_from_xpath(self, selector):
ret = []
@ -76,9 +89,6 @@ class ExtractImages(object):
return ret
def __call__(self, locations):
if not self.base_url:
raise AttributeError('You must specify either a response or a base_url to the ExtractImages adaptor.')
if isinstance(locations, basestring):
locations = [locations]