12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758 |
- # -*- coding: utf-8 -*-
- # Define your item pipelines here
- #
- # Don't forget to add your pipeline to the ITEM_PIPELINES setting
- # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
- #引入twisted框架
- from twisted.enterprise import adbapi
- #引入scrapy的日志文件
- from scrapy import log
- #引用数据库驱动
- import MySQLdb
- #引用数据库游标
- import MySQLdb.cursors
- class MySQLPipeLine(object):
- def __init__(self):
- #其他参数均为字符串,port居然用整形。
- self.dbpool = adbapi.ConnectionPool('MySQLdb', db='US_Crawler',user='root',host='10.20.5.88', passwd='usestudio-1',port=14062, cursorclass=MySQLdb.cursors.DictCursor,charset='utf8', use_unicode=True)
- def process_item(self,item,spider):
- #twisted架构固定写法
- query=self.dbpool.runInteraction(self._conditional_insert,item)
- query.addErrback(self.handle_error)
- return item
- def _conditional_insert(self,conn,item):
- #调用存储过程方法
- conn.execute('CALL InsertAPIData(%s,%s,%s,%s,%s,%s)', (item['url'],item['title'],item['keywords'],item['description'],item['content'],item['author']))
- #log.msg("Item data in db:%s" %item,level=log.DEBUG)
- def handle_error(self,e):
- #错误处理
- log.err(e)
- # 使用DBUtils连接数据库的方法,比twisted简单很多。
- """
- import MySQLdb
- from DBUtils.PooledDB import PooledDB
- class MySQLPipeLine(object):
- def __init__(self):
- self.pool = PooledDB(MySQLdb,5,host='10.20.5.88',user='root',passwd='usestudio-1',db='US_Crawler',port=14062,charset="utf8")
- def process_item(self, item, spider):
- conn = self.pool.connection()
- cur = conn.cursor()
- # 调用存储过程示例
- #cur.callproc('InsertAPIData', ('1000','张三'))
- # 存储过程传递参数
- cur.callproc('InsertAPIData', (item['name'],item['name']))
- cur.close()
- conn.commit()
- conn.close()
- """
- class ApiPipeline(object):
- def process_item(self, item, spider):
- return item
|