1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-24 02:04:22 +00:00

ported internal scrapy.utils imports to w3lib

This commit is contained in:
Pablo Hoffman 2011-04-19 01:33:52 -03:00
parent fcc8d73840
commit 18d303b5f1
22 changed files with 36 additions and 27 deletions

View File

@ -1,10 +1,9 @@
import sys
from w3lib.url import is_url
from scrapy import log
from scrapy.command import ScrapyCommand
from scrapy.conf import settings
from scrapy.http import Request
from scrapy.utils.url import is_url
from scrapy.utils.conf import arglist_to_dict
from scrapy.exceptions import UsageError

View File

@ -11,10 +11,11 @@ import netrc
from urlparse import urlparse, urljoin
from subprocess import Popen, PIPE, check_call
from w3lib.form import encode_multipart
from scrapy.command import ScrapyCommand
from scrapy.exceptions import UsageError
from scrapy.utils.py26 import json
from scrapy.utils.multipart import encode_multipart
from scrapy.utils.http import basic_auth_header
from scrapy.utils.conf import get_config, closest_scrapy_cfg

View File

@ -1,10 +1,11 @@
import pprint
from w3lib.url import is_url
from scrapy import log
from scrapy.command import ScrapyCommand
from scrapy.http import Request
from scrapy.spider import BaseSpider
from scrapy.utils.url import is_url
from scrapy.exceptions import UsageError
class Command(ScrapyCommand):

View File

@ -1,9 +1,9 @@
from w3lib.url import is_url
from scrapy.command import ScrapyCommand
from scrapy.http import Request
from scrapy.item import BaseItem
from scrapy.utils import display
from scrapy.utils.spider import iterate_spider_output, create_spider_for_request
from scrapy.utils.url import is_url
from scrapy.exceptions import UsageError
from scrapy import log

View File

@ -4,7 +4,7 @@ HTTP basic auth downloader middleware
See documentation in docs/topics/downloader-middleware.rst
"""
from scrapy.utils.http import basic_auth_header
from w3lib.http import basic_auth_header
from scrapy.utils.python import WeakKeyCache

View File

@ -5,13 +5,14 @@ from os.path import join, exists
from time import time
import cPickle as pickle
from w3lib.http import headers_dict_to_raw, headers_raw_to_dict
from scrapy.xlib.pydispatch import dispatcher
from scrapy import signals
from scrapy.http import Headers
from scrapy.exceptions import NotConfigured, IgnoreRequest
from scrapy.core.downloader.responsetypes import responsetypes
from scrapy.utils.request import request_fingerprint
from scrapy.utils.http import headers_dict_to_raw, headers_raw_to_dict
from scrapy.utils.httpobj import urlparse_cached
from scrapy.utils.misc import load_object
from scrapy.utils.project import data_path

View File

@ -1,6 +1,7 @@
from w3lib.url import urljoin_rfc
from scrapy import log
from scrapy.http import HtmlResponse
from scrapy.utils.url import urljoin_rfc
from scrapy.utils.response import get_meta_refresh
from scrapy.exceptions import IgnoreRequest
from scrapy.conf import settings

View File

@ -12,14 +12,14 @@ from ftplib import FTP
from shutil import copyfileobj
from zope.interface import Interface, implements
from twisted.internet import defer, threads
from w3lib.url import file_uri_to_path
from scrapy import log, signals
from scrapy.xlib.pydispatch import dispatcher
from scrapy.utils.ftp import ftp_makedirs_cwd
from scrapy.exceptions import NotConfigured
from scrapy.utils.misc import load_object
from scrapy.utils.url import file_uri_to_path
from scrapy.conf import settings

View File

@ -4,9 +4,10 @@ HTMLParser-based link extractor
from HTMLParser import HTMLParser
from w3lib.url import safe_url_string, urljoin_rfc
from scrapy.link import Link
from scrapy.utils.python import unique as unique_list
from scrapy.utils.url import safe_url_string, urljoin_rfc
class HtmlParserLinkExtractor(HTMLParser):

View File

@ -3,9 +3,9 @@ This module implements the HtmlImageLinkExtractor for extracting
image links only.
"""
from w3lib.url import urljoin_rfc
from scrapy.link import Link
from scrapy.utils.url import canonicalize_url, urljoin_rfc
from scrapy.utils.url import canonicalize_url
from scrapy.utils.python import unicode_to_str, flatten
from scrapy.selector.libxml2sel import XPathSelectorList, HtmlXPathSelector

View File

@ -7,10 +7,10 @@ because it collides with the lxml library module.
from lxml import etree
import lxml.html
from w3lib.url import safe_url_string, urljoin_rfc
from scrapy.link import Link
from scrapy.utils.python import unique as unique_list, str_to_unicode
from scrapy.utils.url import safe_url_string, urljoin_rfc
class LxmlLinkExtractor(object):
def __init__(self, tag="a", attr="href", process=None, unique=False):

View File

@ -1,7 +1,7 @@
import re
from scrapy.utils.url import urljoin_rfc
from scrapy.utils.markup import remove_tags, remove_entities, replace_escape_chars
from w3lib.url import urljoin_rfc
from w3lib.html import remove_tags, remove_entities, replace_escape_chars
from scrapy.link import Link
from .sgml import SgmlLinkExtractor

View File

@ -4,11 +4,13 @@ SGMLParser-based Link extractors
import re
from w3lib.url import safe_url_string, urljoin_rfc
from scrapy.selector import HtmlXPathSelector
from scrapy.link import Link
from scrapy.utils.misc import arg_to_iter
from scrapy.utils.python import FixedSGMLParser, unique as unique_list, str_to_unicode
from scrapy.utils.url import safe_url_string, urljoin_rfc, canonicalize_url, url_is_from_any_domain
from scrapy.utils.url import canonicalize_url, url_is_from_any_domain
from scrapy.utils.response import get_base_url
class BaseSgmlLinkExtractor(FixedSGMLParser):

View File

@ -1,9 +1,10 @@
"""Request Extractors"""
from w3lib.url import safe_url_string, urljoin_rfc
from scrapy.http import Request
from scrapy.selector import HtmlXPathSelector
from scrapy.utils.misc import arg_to_iter
from scrapy.utils.python import FixedSGMLParser, str_to_unicode
from scrapy.utils.url import safe_url_string, urljoin_rfc
from itertools import ifilter

View File

@ -1,5 +1,5 @@
from w3lib.url import file_uri_to_path
from scrapy.core.downloader.responsetypes import responsetypes
from scrapy.utils.url import file_uri_to_path
from scrapy.utils.decorator import defers
class FileDownloadHandler(object):

View File

@ -1,5 +1,5 @@
from w3lib.http import headers_dict_to_raw
from scrapy.utils.datatypes import CaselessDict
from scrapy.utils.http import headers_dict_to_raw
class Headers(CaselessDict):

View File

@ -7,8 +7,9 @@ See documentation in docs/topics/request-response.rst
import copy
from w3lib.url import safe_url_string
from scrapy.http.headers import Headers
from scrapy.utils.url import safe_url_string
from scrapy.utils.trackref import object_ref
from scrapy.utils.decorator import deprecated
from scrapy.http.common import deprecated_setter

View File

@ -7,6 +7,7 @@ See documentation in docs/topics/shell.rst
import signal
from twisted.internet import reactor, threads
from w3lib.url import any_to_uri
from scrapy.item import BaseItem
from scrapy.spider import BaseSpider
@ -14,7 +15,6 @@ from scrapy.selector import XPathSelector, XmlXPathSelector, HtmlXPathSelector
from scrapy.utils.spider import create_spider_for_request
from scrapy.utils.misc import load_object
from scrapy.utils.response import open_in_browser
from scrapy.utils.url import any_to_uri
from scrapy.utils.console import start_python_console
from scrapy.settings import Settings
from scrapy.http import Request, Response, HtmlResponse, XmlResponse

View File

@ -1,13 +1,13 @@
import os, urlparse
from cStringIO import StringIO
from zope.interface.verify import verifyObject
from twisted.trial import unittest
from twisted.internet import defer
from cStringIO import StringIO
from w3lib.url import path_to_file_uri
from scrapy.spider import BaseSpider
from scrapy.contrib.feedexport import IFeedStorage, FileFeedStorage, FTPFeedStorage, S3FeedStorage, StdoutFeedStorage
from scrapy.utils.url import path_to_file_uri
from scrapy.utils.test import assert_aws_environ
class FeedStorageTest(unittest.TestCase):

View File

@ -8,6 +8,7 @@ from twisted.web import server, static, util, resource
from twisted.web.test.test_webclient import ForeverTakingResource, \
NoLengthResource, HostHeaderResource, \
PayloadResource, BrokenDownloadResource
from w3lib.url import path_to_file_uri
from scrapy.core.downloader.webclient import PartialDownloadError
from scrapy.core.downloader.handlers.file import FileDownloadHandler
@ -15,7 +16,6 @@ from scrapy.core.downloader.handlers.http import HttpDownloadHandler
from scrapy.core.downloader.handlers.s3 import S3DownloadHandler
from scrapy.spider import BaseSpider
from scrapy.http import Request
from scrapy.utils.url import path_to_file_uri
from scrapy import optional_features

View File

@ -4,8 +4,8 @@ import re
import hashlib
from pkgutil import iter_modules
from w3lib.html import remove_entities
from scrapy.utils.python import flatten
from scrapy.utils.markup import remove_entities
def arg_to_iter(arg):
"""Convert an argument to an iterable. The argument can be a None, single

View File

@ -8,9 +8,10 @@ import weakref
from base64 import urlsafe_b64encode
from urlparse import urlunparse
from w3lib.http import basic_auth_header
from scrapy.utils.url import canonicalize_url
from scrapy.utils.httpobj import urlparse_cached
from scrapy.utils.http import basic_auth_header
_fingerprint_cache = weakref.WeakKeyDictionary()