mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-26 17:44:02 +00:00
fixed: Issue #1562 (Incorrectly picked URL in scrapy.http.FormRequest.from_response
when there is a <base>
tag)
This commit is contained in:
parent
dd9f777ba7
commit
c34dbe955d
@ -11,6 +11,7 @@ from parsel.selector import create_root_node
|
|||||||
import six
|
import six
|
||||||
from scrapy.http.request import Request
|
from scrapy.http.request import Request
|
||||||
from scrapy.utils.python import to_bytes, is_listlike
|
from scrapy.utils.python import to_bytes, is_listlike
|
||||||
|
from scrapy.utils.response import get_base_url
|
||||||
|
|
||||||
|
|
||||||
class FormRequest(Request):
|
class FormRequest(Request):
|
||||||
@ -44,7 +45,7 @@ class FormRequest(Request):
|
|||||||
|
|
||||||
def _get_form_url(form, url):
|
def _get_form_url(form, url):
|
||||||
if url is None:
|
if url is None:
|
||||||
return form.action or form.base_url
|
return urljoin(form.base_url, form.action)
|
||||||
return urljoin(form.base_url, url)
|
return urljoin(form.base_url, url)
|
||||||
|
|
||||||
|
|
||||||
@ -58,7 +59,7 @@ def _urlencode(seq, enc):
|
|||||||
def _get_form(response, formname, formid, formnumber, formxpath):
|
def _get_form(response, formname, formid, formnumber, formxpath):
|
||||||
"""Find the form element """
|
"""Find the form element """
|
||||||
text = response.body_as_unicode()
|
text = response.body_as_unicode()
|
||||||
root = create_root_node(text, lxml.html.HTMLParser, base_url=response.url)
|
root = create_root_node(text, lxml.html.HTMLParser, base_url=get_base_url(response))
|
||||||
forms = root.xpath('//form')
|
forms = root.xpath('//form')
|
||||||
if not forms:
|
if not forms:
|
||||||
raise ValueError("No <form> element found in %s" % response)
|
raise ValueError("No <form> element found in %s" % response)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user