mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-23 23:43:52 +00:00
LinkExtractor PY3 'unicode' type fix
This commit is contained in:
parent
de89b1b562
commit
e8205f6733
@ -3,6 +3,7 @@ HTMLParser-based link extractor
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import warnings
|
import warnings
|
||||||
|
import six
|
||||||
from six.moves.html_parser import HTMLParser
|
from six.moves.html_parser import HTMLParser
|
||||||
from six.moves.urllib.parse import urljoin
|
from six.moves.urllib.parse import urljoin
|
||||||
|
|
||||||
@ -39,7 +40,7 @@ class HtmlParserLinkExtractor(HTMLParser):
|
|||||||
ret = []
|
ret = []
|
||||||
base_url = urljoin(response_url, self.base_url) if self.base_url else response_url
|
base_url = urljoin(response_url, self.base_url) if self.base_url else response_url
|
||||||
for link in links:
|
for link in links:
|
||||||
if isinstance(link.url, unicode):
|
if isinstance(link.url, six.text_type):
|
||||||
link.url = link.url.encode(response_encoding)
|
link.url = link.url.encode(response_encoding)
|
||||||
try:
|
try:
|
||||||
link.url = urljoin(base_url, link.url)
|
link.url = urljoin(base_url, link.url)
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
"""
|
"""
|
||||||
SGMLParser-based Link extractors
|
SGMLParser-based Link extractors
|
||||||
"""
|
"""
|
||||||
|
import six
|
||||||
from six.moves.urllib.parse import urljoin
|
from six.moves.urllib.parse import urljoin
|
||||||
import warnings
|
import warnings
|
||||||
from sgmllib import SGMLParser
|
from sgmllib import SGMLParser
|
||||||
@ -40,7 +41,7 @@ class BaseSgmlLinkExtractor(SGMLParser):
|
|||||||
if base_url is None:
|
if base_url is None:
|
||||||
base_url = urljoin(response_url, self.base_url) if self.base_url else response_url
|
base_url = urljoin(response_url, self.base_url) if self.base_url else response_url
|
||||||
for link in self.links:
|
for link in self.links:
|
||||||
if isinstance(link.url, unicode):
|
if isinstance(link.url, six.text_type):
|
||||||
link.url = link.url.encode(response_encoding)
|
link.url = link.url.encode(response_encoding)
|
||||||
try:
|
try:
|
||||||
link.url = urljoin(base_url, link.url)
|
link.url = urljoin(base_url, link.url)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user