本文实例讲述了基于scrapy实现的简单蜘蛛采集程序。分享给大家供大家参考。具体如下:
# Standard Python library imports
# 3rd party imports
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.selector import HtmlX
import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
from qianchengwuyou.items import QianchengwuyouItem
class QcwySpider(CrawlSpider):
name = 'qcwy'
# allowed_domains = ['www.xxx.com']