mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-25 05:24:31 +00:00
added warning of spider naming in new tutorial
--HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40806
This commit is contained in:
parent
3b012d3981
commit
f27d07c1a4
@ -57,8 +57,9 @@ URLs to follow.
|
||||
They are the heart of a Scrapy project and where most part of the action takes
|
||||
place.
|
||||
|
||||
To create our first spider, save this code in a file named dmoz.py inside
|
||||
*dmoz/spiders* folder::
|
||||
To create our first spider, save this code in a file named ``dmoz_spider.py``
|
||||
inside ``dmoz/spiders`` folder::
|
||||
|
||||
|
||||
from scrapy.spider import BaseSpider
|
||||
|
||||
@ -76,6 +77,11 @@ To create our first spider, save this code in a file named dmoz.py inside
|
||||
|
||||
SPIDER = OpenDirectorySpider()
|
||||
|
||||
.. warning::
|
||||
|
||||
When creating spiders, be sure not to name them equal to the project's name
|
||||
or you won't be able to import modules from your project in your spider!
|
||||
|
||||
The first line imports the class BaseSpider. For the purpose of creating a
|
||||
working spider, you must subclass BaseSpider, and then define the three main,
|
||||
mandatory, attributes:
|
||||
@ -316,9 +322,10 @@ so to actually return the data we've scraped so far, the code for our spider
|
||||
should be like this::
|
||||
|
||||
from scrapy.spider import BaseSpider
|
||||
from scrapy.item import ScrapedItem
|
||||
from scrapy.xpath.selector import HtmlXPathSelector
|
||||
|
||||
from dmoz.items import DmozItem
|
||||
|
||||
|
||||
class OpenDirectorySpider(BaseSpider):
|
||||
domain_name = "dmoz.org"
|
||||
@ -332,7 +339,7 @@ should be like this::
|
||||
sites = hxs.x('//ul[2]/li')
|
||||
items = []
|
||||
for site in sites:
|
||||
item = ScrapedItem()
|
||||
item = DmozItem()
|
||||
item.title = site.x('a/text()').extract()
|
||||
item.link = site.x('a/@href').extract()
|
||||
item.desc = site.x('text()').extract()
|
||||
|
Loading…
x
Reference in New Issue
Block a user