''' @File : find_img.py @Time : 2020/05/14 21:08:22 @Author : sorrowfeng @Version : 1.0 @Contact : 1399600304@qq.com @WebSite : https://sorrowfeng.github.io '''
import os import re import urllib.request, urllib.error, urllib.parse import urllib import time import socket import json import tkinter as tk import threading
def SaveImage(url, path): request = urllib.request.Request(url) request.add_header('accept','image/webp,image/apng,*/*;q=0.8') request.add_header('accept-encoding','gzip, deflate, br') request.add_header('accept-language','zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-TW;q=0.6') request.add_header('sec-fetch-dest','image') request.add_header('sec-fetch-mode','no-cors') request.add_header('sec-fetch-site','cross-site') request.add_header('referer','https://pixivic.com/popSearch') request.add_header('user-agent','Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 Edg/81.0.416.72') try: response = urllib.request.urlopen(request) img = response.read() f = open(path, 'wb') f.write(img) f.close() except urllib.error.URLError as ue: if hasattr(ue, 'code'): print(ue.code) if hasattr(ue, "reason"): print(ue.reason) except IOError as ie: print(ie)
return
class Crawler: __time_sleep = 0.1 __counter = 0 __start_amount = 0 __amount = 0 headers = {'user-agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 Edg/81.0.416.72'}
def __init__(self, t, word, app): self.__time_sleep = t if not os.path.exists("./" + word): os.mkdir("./" + word) self.__counter = len(os.listdir('./' + word)) + 1
def get_suffix(self, name): m = re.search(r'\.[^\.]*$', name) if m.group(0) and len(m.group(0)) <= 5: return m.group(0) else: return '.jpeg'
def save_image(self, rsp_data, word): for image_info in rsp_data['imageUrls']: try: pps = image_info['original'].replace('https://i.pximg.net/','https://original.img.cheerfun.dev/') suffix = self.get_suffix(pps) threading.Thread(target=SaveImage, args=(pps, './' + word + '/' + word + "_" + str(self.__counter) + str(suffix))).start() except urllib.error.HTTPError as http_err: print(http_err) continue except Exception as e: time.sleep(1) print(e) print("出现未知错误, 放弃保存") continue else: sum = len(os.listdir('./' + word)) app.t.insert('end', f'第{str(self.__counter)}张图片正在保存, 已有{str(sum)}张图片, 保存在{os.getcwd()}下的{word}文件夹\n') app.t.see(tk.END) app.t.update() self.__counter += 1 time.sleep(self.__time_sleep) return def get_image(self, word=''): search = urllib.parse.quote(word) pagenum = self.__start_amount while pagenum <= self.__amount: url = 'https://api.pixivic.com/illustrations?keyword='+search+'&page='+str(pagenum) try: req = urllib.request.Request(url=url, headers=self.headers) page = urllib.request.urlopen(req) rsp = page.read().decode('utf-8') except UnicodeDecodeError as e: print(e) print('-----UnicodeDecodeErrorurl:', url) except urllib.error.URLError as e: print(e) print("-----urlErrorurl:", url) except socket.timeout as e: print(e) print("-----socket timout:", url) else: rsp_data = json.loads(rsp) temp = rsp_data['data'] for ele in temp: self.save_image(ele, word) print("下载下一页") pagenum += 1 finally: page.close() print("下载结束") return
def start(self, word, page_num, start_page): self.__start_amount = start_page self.__amount = page_num self.get_image(word)
class Application(tk.Frame): theWord = '' def __init__(self, master=None): super().__init__(master) master.title("找图片") master.geometry("500x300") self.pack() self.create_input_widget() self.create_output_widget()
def create_input_widget(self, master=None): L1 = tk.Label(master, text="你要找谁的图片?") L1.pack() self.text_widget = tk.Entry(master, show=None, bd=5) self.text_widget.pack() b1 = tk.Button(master,text="开始找图片",width=15,height=2,command=lambda:self.thread_it(self.start_find)) b1.pack() b2 = tk.Button(master,text="不找了, 退出",width=15,height=2,command=root.destroy) b2.pack()
def create_output_widget(self): self.t = tk.Text(width=400, height=100) self.t.pack()
def start_find(self): self.theWord = self.text_widget.get() self.crawler = Crawler(1.3, self.theWord, app) self.crawler.start(self.theWord, 10, 1)
@staticmethod def thread_it(func, *args): t = threading.Thread(target=func, args=args) t.setDaemon(True) t.start()
if __name__ == "__main__": root = tk.Tk() app = Application(master=root) app.mainloop()
|