mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-28 09:23:51 +00:00
issue GH #1550 - scrapy shell argument fixes: "example.com" requests "http://example.com"; "example" requests "file://example"; "./example.com" requests "file://example.com"
This commit is contained in:
parent
a41c64bfb9
commit
bc9db65358
@ -5,11 +5,13 @@ See documentation in docs/topics/shell.rst
|
||||
"""
|
||||
|
||||
from threading import Thread
|
||||
import urlparse
|
||||
from w3lib.url import any_to_uri
|
||||
|
||||
from scrapy.commands import ScrapyCommand
|
||||
from scrapy.shell import Shell
|
||||
from scrapy.http import Request
|
||||
from scrapy.utils.url import add_http_if_no_scheme
|
||||
from scrapy.utils.spider import spidercls_for_request, DefaultSpider
|
||||
|
||||
|
||||
@ -43,7 +45,16 @@ class Command(ScrapyCommand):
|
||||
def run(self, args, opts):
|
||||
url = args[0] if args else None
|
||||
if url:
|
||||
url = any_to_uri(url)
|
||||
parts = urlparse.urlsplit(url)
|
||||
if not parts.scheme:
|
||||
if "." not in parts.path.split("/", 1)[0]:
|
||||
url = any_to_uri(url)
|
||||
|
||||
for pattern in ["/", "./", "../"]:
|
||||
if url.startswith(pattern):
|
||||
url = any_to_uri(url)
|
||||
break
|
||||
url = add_http_if_no_scheme(url)
|
||||
|
||||
spider_loader = self.crawler_process.spider_loader
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user