diff --git a/scrapy/linkextractors/htmlparser.py b/scrapy/linkextractors/htmlparser.py index dcc261b31..9867e1179 100644 --- a/scrapy/linkextractors/htmlparser.py +++ b/scrapy/linkextractors/htmlparser.py @@ -3,6 +3,7 @@ HTMLParser-based link extractor """ import warnings +import six from six.moves.html_parser import HTMLParser from six.moves.urllib.parse import urljoin @@ -39,7 +40,7 @@ class HtmlParserLinkExtractor(HTMLParser): ret = [] base_url = urljoin(response_url, self.base_url) if self.base_url else response_url for link in links: - if isinstance(link.url, unicode): + if isinstance(link.url, six.text_type): link.url = link.url.encode(response_encoding) try: link.url = urljoin(base_url, link.url) diff --git a/scrapy/linkextractors/sgml.py b/scrapy/linkextractors/sgml.py index 9938e071f..c68dae4c8 100644 --- a/scrapy/linkextractors/sgml.py +++ b/scrapy/linkextractors/sgml.py @@ -1,6 +1,7 @@ """ SGMLParser-based Link extractors """ +import six from six.moves.urllib.parse import urljoin import warnings from sgmllib import SGMLParser @@ -40,7 +41,7 @@ class BaseSgmlLinkExtractor(SGMLParser): if base_url is None: base_url = urljoin(response_url, self.base_url) if self.base_url else response_url for link in self.links: - if isinstance(link.url, unicode): + if isinstance(link.url, six.text_type): link.url = link.url.encode(response_encoding) try: link.url = urljoin(base_url, link.url)