pipelines.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. # -*- coding: utf-8 -*-
  2. # Define your item pipelines here
  3. #
  4. # Don't forget to add your pipeline to the ITEM_PIPELINES setting
  5. # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
  6. #引入twisted框架
  7. from twisted.enterprise import adbapi
  8. #引入scrapy的日志文件
  9. from scrapy import log
  10. #引用数据库驱动
  11. import MySQLdb
  12. #引用数据库游标
  13. import MySQLdb.cursors
  14. class MySQLPipeLine(object):
  15. def __init__(self):
  16. #其他参数均为字符串,port居然用整形。
  17. self.dbpool = adbapi.ConnectionPool('MySQLdb', db='US_Crawler',user='root',host='10.20.5.88', passwd='usestudio-1',port=14062, cursorclass=MySQLdb.cursors.DictCursor,charset='utf8', use_unicode=True)
  18. def process_item(self,item,spider):
  19. #twisted架构固定写法
  20. query=self.dbpool.runInteraction(self._conditional_insert,item)
  21. query.addErrback(self.handle_error)
  22. return item
  23. def _conditional_insert(self,conn,item):
  24. #调用存储过程方法
  25. conn.execute('CALL InsertAPIData(%s,%s,%s,%s,%s,%s)', (item['url'],item['title'],item['keywords'],item['description'],item['content'],item['author']))
  26. #log.msg("Item data in db:%s" %item,level=log.DEBUG)
  27. def handle_error(self,e):
  28. #错误处理
  29. log.err(e)
  30. # 使用DBUtils连接数据库的方法,比twisted简单很多。
  31. """
  32. import MySQLdb
  33. from DBUtils.PooledDB import PooledDB
  34. class MySQLPipeLine(object):
  35. def __init__(self):
  36. self.pool = PooledDB(MySQLdb,5,host='10.20.5.88',user='root',passwd='usestudio-1',db='US_Crawler',port=14062,charset="utf8")
  37. def process_item(self, item, spider):
  38. conn = self.pool.connection()
  39. cur = conn.cursor()
  40. # 调用存储过程示例
  41. #cur.callproc('InsertAPIData', ('1000','张三'))
  42. # 存储过程传递参数
  43. cur.callproc('InsertAPIData', (item['name'],item['name']))
  44. cur.close()
  45. conn.commit()
  46. conn.close()
  47. """
  48. class ApiPipeline(object):
  49. def process_item(self, item, spider):
  50. return item