scrapy----将数据保存到MongoDB中_综合

1.在pipelines.py中自定义自己的pipeline


import pymongo
class MongoPipeline(object):def __init__(self, client, db):self.client = pymongo.MongoClient(client)self.db = self.client[db]# from_crawler()作用就是从settings.py中读取相关配置，然后可以将读取结果保存在类中使用。@classmethoddef from_crawler(cls, crawler):# 创建当前类的对象，并传递两个参数。obj = cls(client=crawler.settings.get('MONGOCLIENT', 'localhost'),db=crawler.settings.get('DB', 'test'))return objdef process_item(self, item, spider):self.db['novel'].update_one({'url': item['url']}, {'$set': dict(item)}, True)return item

2.在settings.py中开启自己的pipeline

ITEM_PIPELINES = {'NovelSpider.pipelines.MongoPipeline': 301,
}
MONGOCLIENT = 'localhost'
DB = 'novel'