# -*- coding: utf-8 -*- import scrapy from scrapy.crawler import CrawlerProcess from myFirstSpider.items import MyfirstspiderItem # 引入item class NgaspiderSpider(scrapy.Spider): name = 'labSpider' start_urls = ['http://lab.scrapyd.cn/'] def parse(self, response): sentence = response.css('div.quote') # css选择器选择所有句子 item = MyfirstspiderItem() # 实例化引入的类 for v in sentence: # 循环获取每个句子里面的:内容、作者、标签 item['content'] = v.css('.text::text').extract_first() # 提取名言 item['author'] = v.css('.author::text').extract_first() # 提取作者 tags = v.css('.tags .tag::text').extract() # 提取标签 item['tag'] = ','.join(tags) # 转化为字符串 yield item # fileName = '%s-语录.txt' % autor # 定义文件名 # with open(fileName, "a+") as f: # 不同人的名言保存在不同的txt文档,“a+”以追加的形式 # f.write(text) # f.write('\n') # 写入 \n 以达到换行的效果 # f.write('标签:' + tags) # f.write('\n-------\n') # f.close() next_page = response.css('li.next a::attr(href)').extract_first() if next_page != None: next_page = response.urljoin(next_page) # 使用urljoin方法将相对路径转换为绝对路径 yield scrapy.Request(next_page, callback=self.parse)