mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-27 07:03:55 +00:00
Add deprecation warning to HtmlParserLinkExtractor
This commit is contained in:
parent
d00e43f39c
commit
389f6e95c5
@ -2,6 +2,7 @@
|
|||||||
HTMLParser-based link extractor
|
HTMLParser-based link extractor
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import warnings
|
||||||
from HTMLParser import HTMLParser
|
from HTMLParser import HTMLParser
|
||||||
from six.moves.urllib.parse import urljoin
|
from six.moves.urllib.parse import urljoin
|
||||||
|
|
||||||
@ -9,12 +10,20 @@ from w3lib.url import safe_url_string
|
|||||||
|
|
||||||
from scrapy.link import Link
|
from scrapy.link import Link
|
||||||
from scrapy.utils.python import unique as unique_list
|
from scrapy.utils.python import unique as unique_list
|
||||||
|
from scrapy.exceptions import ScrapyDeprecationWarning
|
||||||
|
|
||||||
|
|
||||||
class HtmlParserLinkExtractor(HTMLParser):
|
class HtmlParserLinkExtractor(HTMLParser):
|
||||||
|
|
||||||
def __init__(self, tag="a", attr="href", process=None, unique=False):
|
def __init__(self, tag="a", attr="href", process=None, unique=False):
|
||||||
HTMLParser.__init__(self)
|
HTMLParser.__init__(self)
|
||||||
|
|
||||||
|
warnings.warn(
|
||||||
|
"HtmlParserLinkExtractor is deprecated and will be removed in "
|
||||||
|
"future releases. Please use scrapy.linkextractors.LinkExtractor",
|
||||||
|
ScrapyDeprecationWarning
|
||||||
|
)
|
||||||
|
|
||||||
self.scan_tag = tag if callable(tag) else lambda t: t == tag
|
self.scan_tag = tag if callable(tag) else lambda t: t == tag
|
||||||
self.scan_attr = attr if callable(attr) else lambda a: a == attr
|
self.scan_attr = attr if callable(attr) else lambda a: a == attr
|
||||||
self.process_attr = process if callable(process) else lambda v: v
|
self.process_attr = process if callable(process) else lambda v: v
|
||||||
|
Loading…
x
Reference in New Issue
Block a user