diff --git a/docs/topics/link-extractors.rst b/docs/topics/link-extractors.rst index edb047c86..f9b25ae63 100644 --- a/docs/topics/link-extractors.rst +++ b/docs/topics/link-extractors.rst @@ -78,8 +78,8 @@ LxmlLinkExtractor :param deny_extensions: a single value or list of strings containing extensions that should be ignored when extracting links. If not given, it will default to the - ``IGNORED_EXTENSIONS`` list defined in the `scrapy.linkextractor`_ - module. + ``IGNORED_EXTENSIONS`` list defined in the + `scrapy.linkextractors`_ module. :type deny_extensions: list :param restrict_xpaths: is an XPath (or list of XPath's) which defines @@ -132,4 +132,4 @@ LxmlLinkExtractor :type process_value: callable -.. _scrapy.linkextractor: https://github.com/scrapy/scrapy/blob/master/scrapy/linkextractor.py +.. _scrapy.linkextractors: https://github.com/scrapy/scrapy/blob/master/scrapy/linkextractors/__init__.py diff --git a/scrapy/linkextractor.py b/scrapy/linkextractors/__init__.py similarity index 94% rename from scrapy/linkextractor.py rename to scrapy/linkextractors/__init__.py index 2a4d18877..8567dbb76 100644 --- a/scrapy/linkextractor.py +++ b/scrapy/linkextractors/__init__.py @@ -1,6 +1,9 @@ """ -Common code and definitions used by Link extractors (located in -scrapy.linkextractors). +scrapy.linkextractors + +This package contains a collection of Link Extractors. + +For more info see docs/topics/link-extractors.rst """ import re from six.moves.urllib.parse import urlparse @@ -98,3 +101,6 @@ class FilteringLinkExtractor(object): def _extract_links(self, *args, **kwargs): return self.link_extractor._extract_links(*args, **kwargs) + +# Top-level imports +from .lxmlhtml import LxmlLinkExtractor as LinkExtractor diff --git a/scrapy/linkextractors/lxmlhtml.py b/scrapy/linkextractors/lxmlhtml.py index 1ff8e4d36..1c31a15b5 100644 --- a/scrapy/linkextractors/lxmlhtml.py +++ b/scrapy/linkextractors/lxmlhtml.py @@ -11,7 +11,7 @@ from scrapy.selector import Selector from scrapy.link import Link from scrapy.utils.misc import arg_to_iter from scrapy.utils.python import unique as unique_list, str_to_unicode -from scrapy.linkextractor import FilteringLinkExtractor +from scrapy.linkextractors import FilteringLinkExtractor from scrapy.utils.response import get_base_url diff --git a/scrapy/linkextractors/sgml.py b/scrapy/linkextractors/sgml.py index 88d2d5b91..bae4ad5c0 100644 --- a/scrapy/linkextractors/sgml.py +++ b/scrapy/linkextractors/sgml.py @@ -8,7 +8,7 @@ from sgmllib import SGMLParser from w3lib.url import safe_url_string from scrapy.selector import Selector from scrapy.link import Link -from scrapy.linkextractor import FilteringLinkExtractor +from scrapy.linkextractors import FilteringLinkExtractor from scrapy.utils.misc import arg_to_iter from scrapy.utils.python import unique as unique_list, str_to_unicode from scrapy.utils.response import get_base_url