diff --git a/scrapy/trunk/scrapy/tests/test_xpath.py b/scrapy/trunk/scrapy/tests/test_xpath.py
index 481ea4293..e58aad374 100644
--- a/scrapy/trunk/scrapy/tests/test_xpath.py
+++ b/scrapy/trunk/scrapy/tests/test_xpath.py
@@ -5,8 +5,8 @@ import unittest
 import libxml2
 
 from scrapy.http import Response
-from scrapy.xpath.selector import XPathSelector
-from scrapy.xpath.constructors import xmlDoc_from_xml
+from scrapy.xpath.selector import XPathSelector, XmlXPathSelector, HtmlXPathSelector
+#from scrapy.xpath.constructors import xmlDoc_from_xml, xmlDoc_from_html
 from scrapy.xpath.iterator import XMLNodeIterator
 
 class XPathTestCase(unittest.TestCase):
@@ -23,7 +23,7 @@ class XPathTestCase(unittest.TestCase):
         """Simple selector tests"""
         body = "<p><input name='a'value='1'/><input name='b'value='2'/></p>"
         response = Response(domain="example.com", url="http://example.com", body=body)
-        xpath = XPathSelector(response)
+        xpath = HtmlXPathSelector(response)
 
         xl = xpath.x('//input')
         self.assertEqual(2, len(xl))
@@ -59,7 +59,7 @@ class XPathTestCase(unittest.TestCase):
                   </body>"""
 
         response = Response(domain="example.com", url="http://example.com", body=body)
-        x = XPathSelector(response)
+        x = HtmlXPathSelector(response)
 
         divtwo = x.x('//div[@class="two"]')
         self.assertEqual(divtwo.x("//li").extract(),
@@ -84,7 +84,7 @@ class XPathTestCase(unittest.TestCase):
 
                """
         response = Response(domain="example.com", url="http://example.com", body=body)
-        x = XPathSelector(response)
+        x = HtmlXPathSelector(response)
 
         name_re = re.compile("Name: (\w+)")
         self.assertEqual(x.x("//ul/li").re(name_re),
@@ -92,6 +92,20 @@ class XPathTestCase(unittest.TestCase):
         self.assertEqual(x.x("//ul/li").re("Age: (\d+)"),
                          ["10", "20"])
 
+    def test_selector_over_text(self):
+        hxs = HtmlXPathSelector(text='<root>lala</root>')
+        self.assertEqual(hxs.extract(),
+                         u'<html><body><root>lala</root></body></html>')
+
+        xxs = XmlXPathSelector(text='<root>lala</root>')
+        self.assertEqual(xxs.extract(),
+                         u'<root>lala</root>')
+
+        xxs = XmlXPathSelector(text='<root>lala</root>')
+        self.assertEqual(xxs.x('.').extract(),
+                         [u'<root>lala</root>'])
+
+
     def test_selector_namespaces_simple(self):
         body = """
         <test xmlns:somens="http://scrapy.org">
@@ -101,7 +115,7 @@ class XPathTestCase(unittest.TestCase):
         """
 
         response = Response(domain="example.com", url="http://example.com", body=body)
-        x = XPathSelector(response, constructor=xmlDoc_from_xml)
+        x = XmlXPathSelector(response)
         
         x.register_namespace("somens", "http://scrapy.org")
         self.assertEqual(x.x("//somens:a").extract(), 
@@ -119,7 +133,7 @@ class XPathTestCase(unittest.TestCase):
 </BrowseNode>
         """
         response = Response(domain="example.com", url="http://example.com", body=body)
-        x = XPathSelector(response, constructor=xmlDoc_from_xml)
+        x = XmlXPathSelector(response)
 
         x.register_namespace("xmlns", "http://webservices.amazon.com/AWSECommerceService/2005-10-05")
         x.register_namespace("p", "http://www.scrapy.org/product")
@@ -146,10 +160,19 @@ class XPathTestCase(unittest.TestCase):
 
         headers = {'Content-Type': ['text/html; charset=utf-8']}
         response = Response(domain="example.com", url="http://example.com", headers=headers, body=html_utf8)
-        x = XPathSelector(response)
+        x = HtmlXPathSelector(response)
         self.assertEquals(x.x("//span[@id='blank']/text()").extract(),
                           [u'\xa3'])
 
+    def test_null_bytes(self):
+        hxs = HtmlXPathSelector(text='<root>la\x00la</root>')
+        self.assertEqual(hxs.extract(),
+                         u'<html><body><root>lala</root></body></html>')
+
+        xxs = XmlXPathSelector(text='<root>la\x00la</root>')
+        self.assertEqual(xxs.extract(),
+                         u'<root>lala</root>')
+
     def test_iterator(self):
         body = """<?xml version="1.0" encoding="UTF-8"?>
 <products xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="someschmea.xsd">
diff --git a/scrapy/trunk/scrapy/xpath/constructors.py b/scrapy/trunk/scrapy/xpath/constructors.py
index 95eedf5d8..e6ca7fdcc 100644
--- a/scrapy/trunk/scrapy/xpath/constructors.py
+++ b/scrapy/trunk/scrapy/xpath/constructors.py
@@ -24,5 +24,8 @@ def xmlDoc_from_html(response):
 
 def xmlDoc_from_xml(response):
     """Return libxml2 doc for XMLs"""
-    return libxml2.readDoc(response.body.to_string('utf-8'), response.url, 'utf-8', xml_parser_options)
-
+    try:
+        lxdoc = libxml2.readDoc(response.body.to_string('utf-8'), response.url, 'utf-8', xml_parser_options)
+    except TypeError:  # libxml2 doesn't parse text with null bytes
+        lxdoc = libxml2.readDoc(response.body.to_string('utf-8').replace("\x00", ""), response.url, 'utf-8', xml_parser_options)
+    return lxdoc
diff --git a/scrapy/trunk/scrapy/xpath/iterator.py b/scrapy/trunk/scrapy/xpath/iterator.py
index c216cad22..5d483914e 100644
--- a/scrapy/trunk/scrapy/xpath/iterator.py
+++ b/scrapy/trunk/scrapy/xpath/iterator.py
@@ -4,8 +4,8 @@ from cStringIO import StringIO
 
 import libxml2
 
-from scrapy.xpath.constructors import xml_parser_options, xmlDoc_from_xml
-from scrapy.xpath.selector import XPathSelector
+from scrapy.xpath.constructors import xml_parser_options
+from scrapy.xpath.selector import XmlXPathSelector
 
 class XMLNodeIterator(object):
     """XMLNodeIterator provides a way to iterate over all nodes of the same
@@ -63,7 +63,7 @@ class XMLNodeSAXParser():
         if name == self.requested_nodename:
             self.inside_requested_node = False
             string = ''.join([self.xml_declaration, self.buffer.getvalue()])
-            selector = XPathSelector(text=string, constructor=xmlDoc_from_xml).x('/' + self.requested_nodename)[0]
+            selector = XmlXPathSelector(text=string).x('/' + self.requested_nodename)[0]
             self.selectors.append(selector)
 
     def characters(self, data):
diff --git a/scrapy/trunk/scrapy/xpath/selector.py b/scrapy/trunk/scrapy/xpath/selector.py
index 85d0d8762..70e007457 100644
--- a/scrapy/trunk/scrapy/xpath/selector.py
+++ b/scrapy/trunk/scrapy/xpath/selector.py
@@ -2,22 +2,20 @@ import libxml2
 
 from scrapy.http import Response
 from scrapy.xpath.extension import Libxml2Document
-from scrapy.xpath.constructors import xmlDoc_from_html
+from scrapy.xpath.constructors import xmlDoc_from_html, xmlDoc_from_xml
 from scrapy.utils.python import flatten
 from scrapy.utils.misc import extract_regex
 
 class XPathSelector(object):
-    """Provides an easy way for selecting document parts using XPaths and
-    regexs, it also supports nested queries.
-    
-    Usage example (untested code):
-    
-    x = XPathSelector(response)
-    i = ScrapedItem()
-    i.assign("name", x.x("//h2/text()"))
-    i.assign("features", x.x("//div[@class='features']).x("./span/text()")
-    """
+    """The XPathSelector class provides a convenient way for selecting document
+    parts using XPaths and regexs, with support for nested queries.
 
+    Although this is not an abstract class, you usually instantiate one of its
+    children:
+    
+    - XmlXPathSelector (for XML content)
+    - HtmlXPathSelector (for HTML content)
+    """
 
     def __init__(self, response=None, text=None, node=None, parent=None, expr=None, constructor=xmlDoc_from_html):
         if parent:
@@ -36,6 +34,8 @@ class XPathSelector(object):
         self.expr = expr
 
     def x(self, xpath):
+        """Perform the given XPath query on the current XPathSelector and
+        return a XPathSelectorList of the result"""
         if hasattr(self.xmlNode, 'xpathEval'):
             self.doc.xpathContext.setContextNode(self.xmlNode)
             xpath_result = self.doc.xpathContext.xpathEval(xpath)
@@ -47,13 +47,20 @@ class XPathSelector(object):
             return XPathSelectorList([])
 
     def re(self, regex):
+        """Return a list of unicode strings by applying the regex over all
+        current XPath selections, and flattening the results"""
         return extract_regex(regex, self.extract(), 'utf-8')
 
-    def extract(self, **kwargs): 
+    def extract(self):
+        """Return a unicode string of the content referenced by the XPathSelector"""
         if isinstance(self.xmlNode, basestring):
             text = unicode(self.xmlNode, 'utf-8', errors='ignore')
-        elif hasattr(self.xmlNode, 'xpathEval'):
-            if isinstance(self.xmlNode, libxml2.xmlAttr):
+        elif hasattr(self.xmlNode, 'serialize'):
+            if isinstance(self.xmlNode, libxml2.xmlDoc):
+                data = self.xmlNode.getRootElement().serialize('utf-8')
+                text = unicode(data, 'utf-8', errors='ignore') if data else u''
+            elif isinstance(self.xmlNode, libxml2.xmlAttr): 
+                # serialization doesn't work sometimes for xmlAttr types
                 text = unicode(self.xmlNode.content, errors='ignore')
             else:
                 data = self.xmlNode.serialize('utf-8')
@@ -66,6 +73,7 @@ class XPathSelector(object):
         return text
 
     def register_namespace(self, prefix, uri):
+        """Register namespace so that it can be used in XPath queries"""
         self.doc.xpathContext.xpathRegisterNs(prefix, uri)
 
     def __str__(self):
@@ -75,13 +83,29 @@ class XPathSelector(object):
 
 
 class XPathSelectorList(list):
-
-    def extract(self, **kwargs):
-        return [x.extract(**kwargs) if isinstance(x, XPathSelector) else x for x in self]
+    """List of XPathSelector objects"""
 
     def x(self, xpath):
+        """Perform the given XPath query on each XPathSelector of the list and
+        return a new (flattened) XPathSelectorList of the results"""
         return XPathSelectorList(flatten([x.x(xpath) for x in self]))
 
     def re(self, regex):
+        """Perform the re() method on each XPathSelector of the list, and
+        return the result as a flattened list of unicode strings"""
         return flatten([x.re(regex) for x in self])
     
+    def extract(self):
+        """Return a list of unicode strings with the content referenced by each
+        XPathSelector of the list"""
+        return [x.extract() if isinstance(x, XPathSelector) else x for x in self]
+
+class XmlXPathSelector(XPathSelector):
+    """XPathSelector for XML content"""
+    def __init__(self, response=None, text=None):
+        XPathSelector.__init__(self, response=response, text=text, constructor=xmlDoc_from_xml)
+
+class HtmlXPathSelector(XPathSelector):
+    """XPathSelector for HTML content"""
+    def __init__(self, response=None, text=None):
+        XPathSelector.__init__(self, response=response, text=text, constructor=xmlDoc_from_html)