lxml form request cleanup. #111

* remove unused _nons function copied from lxml.html * compute clickables only if dont_click is False * less _get_clickables function branch nesting
2025-02-27 08:24:22 +00:00 · 2012-04-13 10:52:39 -03:00 · 2012-04-13 10:52:39 -03:00 · 32b9f788be
commit 32b9f788be
parent e4d22cb16a
1 changed files with 36 additions and 52 deletions
--- a/scrapy/http/request/form.py
+++ b/scrapy/http/request/form.py
@ -12,8 +12,6 @@ from lxml import html
 from scrapy.http.request import Request
 from scrapy.utils.python import unicode_to_str
 XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"
 class MultipleElementsFound(Exception):
    pass
@ -53,13 +51,6 @@ class FormRequest(Request):
        url = form.action or form.base_url
        return cls(url, method=form.method, formdata=formdata, **kwargs)
 # Copied from lxml.html to avoid relying on a non-public function
 def _nons(tag):
    if isinstance(tag, basestring):
        if tag[0] == '{' and tag[1:len(XHTML_NAMESPACE)+1] == XHTML_NAMESPACE:
            return tag.split('}')[-1]
    return tag
 def _urlencode(seq, enc):
    values = [(unicode_to_str(k, enc), unicode_to_str(v, enc))
              for k, vs in seq
@ -92,13 +83,12 @@ def _get_form(hxs, formname, formnumber, response):
 def _get_inputs(form, formdata, dont_click, clickdata, response):
    inputs = [(n, v) for n, v in form.form_values() if n not in formdata]
    clickables = [el for el in form.inputs if el.type == 'submit']
-    # If we are allowed to click on buttons and we have clickable
+    if not dont_click:
-    # elements, we move on to see if we have any clickdata
+        clickables = [el for el in form.inputs if el.type == 'submit']
-    if not dont_click and clickables:
+        if clickables:
-        clickable = _get_clickable(clickdata, clickables, form)
+            clickable = _get_clickable(clickdata, clickables, form)
-        inputs.append(clickable)
+            inputs.append(clickable)
    inputs.extend(formdata.iteritems())
    return inputs
@ -109,42 +99,36 @@ def _get_clickable(clickdata, clickables, form):
    if the latter is given. If not, it returns the first
    clickable element found
    """
-    # If clickdata is given, we compare it to the clickable elements
+    # If we don't have clickdata, we just use the first clickable element
-    # to find a match
+    if clickdata is None:
    if clickdata is not None:
        # We first look to see if the number is specified in
        # clickdata, because that uniquely identifies the element
        nr = clickdata.get('nr', None)
        if nr is not None:
            try:
                el = list(form.inputs)[nr]
            except IndexError:
                pass
            else:
                return (el.name, el.value)
        # We didn't find it, so now we build an XPath expression
        # out of the other arguments, because they can be used
        # as such
        else:
            xpath_pred = []
            for k, v in clickdata.items():
                if k == 'coord':
                    v = ','.join(str(c) for c in v)
                xpath_pred.append('[@%s="%s"]' % (k, v))
            xpath_expr = '//*%s' % ''.join(xpath_pred)
            el = form.xpath(xpath_expr)
            if len(el) > 1:
                raise MultipleElementsFound(
                        "Multiple elements found (%r) matching the criteria"
                        " in clickdata: %r" % (el, clickdata)
                )
            else:
                return (el[0].name, el[0].value)
    # If we don't have clickdata, we just use the first
    # clickable element
    else:
        el = clickables.pop(0)
        return (el.name, el.value)
    # If clickdata is given, we compare it to the clickable elements to find a
    # match. We first look to see if the number is specified in clickdata,
    # because that uniquely identifies the element
    nr = clickdata.get('nr', None)
    if nr is not None:
        try:
            el = list(form.inputs)[nr]
        except IndexError:
            pass
        else:
            return (el.name, el.value)
    # We didn't find it, so now we build an XPath expression out of the other
    # arguments, because they can be used as such
    xpath_pred = []
    for k, v in clickdata.items():
        if k == 'coord':
            v = ','.join(str(c) for c in v)
        xpath_pred.append('[@%s="%s"]' % (k, v))
    xpath_expr = '//*%s' % ''.join(xpath_pred)
    el = form.xpath(xpath_expr)
    if len(el) > 1:
        raise MultipleElementsFound("Multiple elements found (%r) "
                                    "matching the criteria in clickdata: %r"
                                    % (el, clickdata))
    else:
        return (el[0].name, el[0].value)