功能:设置起始URL和爬取范围,设置要提取的数据路径,返回item或下一个URL地址
# -*- coding: utf-8 -*-
import scrapy
from Tenxun.items import TenxunItemclass TencentSpider(scrapy.Spider):name = 'tencent'allowed_domains = ['tencent.com']start_urls = ['http://hr.tencent.com/position.php?&start=0']def parse(self, response):nodelist = response.xpath(