mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-28 17:57:55 +00:00
issue GH #1550 - scrapy shell argument fixes: "example.com" requests "http://example.com"; "example" requests "file://example"; "./example.com" requests "file://example.com"
This commit is contained in:
parent
a41c64bfb9
commit
bc9db65358
@ -5,11 +5,13 @@ See documentation in docs/topics/shell.rst
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
import urlparse
|
||||||
from w3lib.url import any_to_uri
|
from w3lib.url import any_to_uri
|
||||||
|
|
||||||
from scrapy.commands import ScrapyCommand
|
from scrapy.commands import ScrapyCommand
|
||||||
from scrapy.shell import Shell
|
from scrapy.shell import Shell
|
||||||
from scrapy.http import Request
|
from scrapy.http import Request
|
||||||
|
from scrapy.utils.url import add_http_if_no_scheme
|
||||||
from scrapy.utils.spider import spidercls_for_request, DefaultSpider
|
from scrapy.utils.spider import spidercls_for_request, DefaultSpider
|
||||||
|
|
||||||
|
|
||||||
@ -43,8 +45,17 @@ class Command(ScrapyCommand):
|
|||||||
def run(self, args, opts):
|
def run(self, args, opts):
|
||||||
url = args[0] if args else None
|
url = args[0] if args else None
|
||||||
if url:
|
if url:
|
||||||
|
parts = urlparse.urlsplit(url)
|
||||||
|
if not parts.scheme:
|
||||||
|
if "." not in parts.path.split("/", 1)[0]:
|
||||||
url = any_to_uri(url)
|
url = any_to_uri(url)
|
||||||
|
|
||||||
|
for pattern in ["/", "./", "../"]:
|
||||||
|
if url.startswith(pattern):
|
||||||
|
url = any_to_uri(url)
|
||||||
|
break
|
||||||
|
url = add_http_if_no_scheme(url)
|
||||||
|
|
||||||
spider_loader = self.crawler_process.spider_loader
|
spider_loader = self.crawler_process.spider_loader
|
||||||
|
|
||||||
spidercls = DefaultSpider
|
spidercls = DefaultSpider
|
||||||
|
Loading…
x
Reference in New Issue
Block a user