''' @File : 找小说.py @Time : 2020/05/21 16:38:01 @Author : sorrowfeng @Version : 1.0 @Contact : 1399600304@qq.com @WebSite : https://sorrowfeng.github.io '''
import requests from lxml import etree import os
search_url = "http://www.xbiquge.la/modules/article/waps.php?searchkey=" all_url = "http://www.xbiquge.la/xiaoshuodaquan/"
def list_dic(list1,list2): ''' two lists merge a dict,a list as key,other list as value :param list1:key :param list2:value :return:dict ''' dic = dict(map(lambda x,y:[x,y], list1,list2)) return dic
class Spider: word = '' file_name = ''
def __init__(self, word): self.word = word
def start_requests(self): start_url = search_url + str(self.word) response = requests.get(start_url) response.encoding="utf-8" myhtml = etree.HTML(response.text) name_list = myhtml.xpath("//div[@id='content']/form/table[@class='grid']/tr/td[@class='even'][1]/a/text()") url_list = myhtml.xpath("//div[@id='content']/form/table[@class='grid']/tr/td[@class='even'][1]/a/@href")
new_dic = list_dic(name_list, url_list)
if not new_dic: print("没有找到此书") os.system('pause') return
num = 0 find_dic = {} print('\n') for i in new_dic: num += 1 find_dic[str(num)]=i print(str(num) + '. ' + i) book_num = input('\n你想下载的是(请输入序号):') print('\n\n') shu_name = find_dic[book_num] shu_url = new_dic.get(shu_name) print('正在为你下载:'+shu_name+'\n')
if os.path.exists(shu_name) == False: os.mkdir(shu_name)
self.requests_zhang(shu_name,shu_url)
def requests_zhang(self,shu_name,shu_url): response = requests.get(shu_url) response.encoding='utf-8' html = etree.HTML(response.text) zhang_name_list = html.xpath('//div[@id="list"]/dl/dd/a/text()') zhang_url_list =html.xpath('//div[@id="list"]/dl/dd/a/@href') for zhang_name,zhang_url in zip(zhang_name_list,zhang_url_list): self.requests_data(zhang_name,zhang_url,shu_name) print('\n下载完成!存放于' + os.getcwd() + '\\' + self.file_name + '\n') print('正在准备退出') os.system('pause')
def requests_data(self,zhang_name,zhang_url,shu_name): data_url = 'http://www.xbiquge.la'+zhang_url response = requests.get(data_url) response.encoding='utf-8' html = etree.HTML(response.text)
content = "\n".join(html.xpath('//div[@id="content"]/text()')) self.file_name = shu_name + '\\' + shu_name + '.txt' print("正在下载:"+zhang_name) with open(self.file_name,"a",encoding='utf-8') as f: f.write('\n\n\n'+str(zhang_name)+'\n\n\n') f.write(content)
if __name__ == '__main__': word = input('请输入要下载的书名(或作者):') spider = Spider(word) spider.start_requests()
|