mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-27 08:04:10 +00:00
lxml form request cleanup. #111
* remove unused _nons function copied from lxml.html * compute clickables only if dont_click is False * less _get_clickables function branch nesting
This commit is contained in:
parent
e4d22cb16a
commit
32b9f788be
@ -12,8 +12,6 @@ from lxml import html
|
||||
from scrapy.http.request import Request
|
||||
from scrapy.utils.python import unicode_to_str
|
||||
|
||||
XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"
|
||||
|
||||
|
||||
class MultipleElementsFound(Exception):
|
||||
pass
|
||||
@ -53,13 +51,6 @@ class FormRequest(Request):
|
||||
url = form.action or form.base_url
|
||||
return cls(url, method=form.method, formdata=formdata, **kwargs)
|
||||
|
||||
# Copied from lxml.html to avoid relying on a non-public function
|
||||
def _nons(tag):
|
||||
if isinstance(tag, basestring):
|
||||
if tag[0] == '{' and tag[1:len(XHTML_NAMESPACE)+1] == XHTML_NAMESPACE:
|
||||
return tag.split('}')[-1]
|
||||
return tag
|
||||
|
||||
def _urlencode(seq, enc):
|
||||
values = [(unicode_to_str(k, enc), unicode_to_str(v, enc))
|
||||
for k, vs in seq
|
||||
@ -92,13 +83,12 @@ def _get_form(hxs, formname, formnumber, response):
|
||||
|
||||
def _get_inputs(form, formdata, dont_click, clickdata, response):
|
||||
inputs = [(n, v) for n, v in form.form_values() if n not in formdata]
|
||||
clickables = [el for el in form.inputs if el.type == 'submit']
|
||||
|
||||
# If we are allowed to click on buttons and we have clickable
|
||||
# elements, we move on to see if we have any clickdata
|
||||
if not dont_click and clickables:
|
||||
clickable = _get_clickable(clickdata, clickables, form)
|
||||
inputs.append(clickable)
|
||||
if not dont_click:
|
||||
clickables = [el for el in form.inputs if el.type == 'submit']
|
||||
if clickables:
|
||||
clickable = _get_clickable(clickdata, clickables, form)
|
||||
inputs.append(clickable)
|
||||
|
||||
inputs.extend(formdata.iteritems())
|
||||
return inputs
|
||||
@ -109,42 +99,36 @@ def _get_clickable(clickdata, clickables, form):
|
||||
if the latter is given. If not, it returns the first
|
||||
clickable element found
|
||||
"""
|
||||
# If clickdata is given, we compare it to the clickable elements
|
||||
# to find a match
|
||||
if clickdata is not None:
|
||||
# We first look to see if the number is specified in
|
||||
# clickdata, because that uniquely identifies the element
|
||||
nr = clickdata.get('nr', None)
|
||||
if nr is not None:
|
||||
try:
|
||||
el = list(form.inputs)[nr]
|
||||
except IndexError:
|
||||
pass
|
||||
else:
|
||||
return (el.name, el.value)
|
||||
|
||||
# We didn't find it, so now we build an XPath expression
|
||||
# out of the other arguments, because they can be used
|
||||
# as such
|
||||
else:
|
||||
xpath_pred = []
|
||||
for k, v in clickdata.items():
|
||||
if k == 'coord':
|
||||
v = ','.join(str(c) for c in v)
|
||||
xpath_pred.append('[@%s="%s"]' % (k, v))
|
||||
|
||||
xpath_expr = '//*%s' % ''.join(xpath_pred)
|
||||
el = form.xpath(xpath_expr)
|
||||
if len(el) > 1:
|
||||
raise MultipleElementsFound(
|
||||
"Multiple elements found (%r) matching the criteria"
|
||||
" in clickdata: %r" % (el, clickdata)
|
||||
)
|
||||
else:
|
||||
return (el[0].name, el[0].value)
|
||||
|
||||
# If we don't have clickdata, we just use the first
|
||||
# clickable element
|
||||
else:
|
||||
# If we don't have clickdata, we just use the first clickable element
|
||||
if clickdata is None:
|
||||
el = clickables.pop(0)
|
||||
return (el.name, el.value)
|
||||
|
||||
# If clickdata is given, we compare it to the clickable elements to find a
|
||||
# match. We first look to see if the number is specified in clickdata,
|
||||
# because that uniquely identifies the element
|
||||
nr = clickdata.get('nr', None)
|
||||
if nr is not None:
|
||||
try:
|
||||
el = list(form.inputs)[nr]
|
||||
except IndexError:
|
||||
pass
|
||||
else:
|
||||
return (el.name, el.value)
|
||||
|
||||
# We didn't find it, so now we build an XPath expression out of the other
|
||||
# arguments, because they can be used as such
|
||||
xpath_pred = []
|
||||
for k, v in clickdata.items():
|
||||
if k == 'coord':
|
||||
v = ','.join(str(c) for c in v)
|
||||
xpath_pred.append('[@%s="%s"]' % (k, v))
|
||||
|
||||
xpath_expr = '//*%s' % ''.join(xpath_pred)
|
||||
el = form.xpath(xpath_expr)
|
||||
if len(el) > 1:
|
||||
raise MultipleElementsFound("Multiple elements found (%r) "
|
||||
"matching the criteria in clickdata: %r"
|
||||
% (el, clickdata))
|
||||
else:
|
||||
return (el[0].name, el[0].value)
|
||||
|
Loading…
x
Reference in New Issue
Block a user