百木园-与人分享,
就是让自己快乐。

快看,这是我为你准备的Python爬取图片教程

爬取图片实例

在这里插入图片描述

•selenium+win32爬取图片

Python学习交流Q群:903971231#####
\"\"\"爬取图片\"\"\"
import os
import threading
import time
from ctypes import windll

import requests
import win32ap
iimport win32clipboard
import win32con
from PySide2 import QtWidgets
from requests_html import HTMLSession, HTML
from PySide2.QtGui import QPixmap, QColor, QStandardItemModel, QStandardItem
from PySide2.QtCore import QFile, Qt, QDateTime, QDate, QTime, QTimer, QStringListModel, QModelIndex
from PySide2.QtUiTools import QUiLoader
from PySide2.QtWidgets import QApplication, QTreeView, QTreeWidget, QHeaderView, QTreeWidgetItem, QWidget
from bs4 import BeautifulSoup
from selenium import webdriverfrom selenium.webdriver import ActionChainsfrom selenium.webdriver.chrome.options import Optionsfrom selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class Test:    
def __init__(self):        
super(Test, self).__init__()
file = QFile(\'UI.ui\')        
file.open(QFile.ReadOnly)        
file.close()
self.ui = QUiLoader().load(file)
self.ui.B_start.clicked.connect(self.start)        
self.ui.B_left.clicked.connect(lambda: self.change_index(\'left\'))        
self.ui.B_right.clicked.connect(lambda: self.change_index(\'right\'))
#定义图片列表        
self.img_list = []        
#图片自适应        
self.ui.label.setScaledContents(True)        
#当前显示的图片索引        
self.index = 0
self.headers = {            
\"Accept\": \"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9\",            
\"Accept-Encoding\": \"gzip, deflate\",            
\"Accept-Language\": \"zh-CN,zh;q=0.9\",           
 \"Upgrade-Insecure-Requests\": \"1\",            
 \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36\",        }
 self.text = \'\'
     # 使用phantomJS消除浏览器界面        
#self.browser = webdriver.PhantomJS()        
#出警告可使用设置chrome的方法        
#浏览器设置        
options = Options()        
options.add_argument(\'--headless\')       
 # self.browser = webdriver.Chrome(options=options)        
 self.browser = webdriver.Chrome()        
 self.wait = WebDriverWait(self.browser, 30)
self.session = requests.Session()
self.pull()        
self.start_show_pic()
 def change_index(self, button):        
 if button == \'left\':            
 self.index -= 1            
 pix = QPixmap(\'图片/\' + self.img_list[self.index])            
 self.ui.label.setPixmap(pix)       
  else:            
  self.index += 1            
  pix = QPixmap(\'图片/\' + self.img_list[self.index])            
  self.ui.label.setPixmap(pix)
def start_show_pic(self):        
t1 = threading.Thread(target=self.show_pic)        
t1.setDaemon(True)        t1.start()
    def show_pic(self):        
    while True:            
    for i in os.walk(\'图片\'):                
    self.img_list = i[2]            
    if self.img_list:                
    pix = QPixmap(\'图片/\' + self.img_list[self.index])                
    self.ui.label.setPixmap(pix)                
    time.sleep(3)                
    self.index += 1                
    if self.index > len(self.img_list):                    
    self.index = 0
    def start(self):       
     t1 = threading.Thread(target=self.get_img)        
     t1.setDaemon(True)        
     t1.start()
    def pull(self):       
     \"\"\"        
查看是否有目标网站的源代码,如果有就读取,        
如果没有就发送请求       
 \"\"\"        
 if os.path.exists(\'爬取图片.html\'):            
 with open(\'爬取图片.html\', \'r\', encoding=\'utf8\') as f:                
 self.text = f.read()                
 # print(self.text)        
 else:            
 self.browser.get(\'http://www.netbian.com/\')            
 self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, \'.list\')))            
 self.text = self.browser.page_source            
 with open(\'爬取图片.html\', \'w\', encoding=\'utf8\') as f:                
 f.write(self.text)            
 print(self.text)
 self.ui.B_start.setEnabled(True)
def get_img(self):        
html = BeautifulSoup(self.text, \'lxml\')        
href_url = html.select(\'.list ul li a\')        
print(href_url)        
for a in href_url:            
#print(\'@@@\', a)            
if a[\'href\'].startswith(\'/desk\'):                
url = \'http://www.netbian.com\' + a[\'href\']                
#print(url)                
self.browser.get(url)                
pic = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, \'.pic\')))                
soup = BeautifulSoup(self.browser.page_source, \'lxml\')                
img = soup.select_one(\'#main > div.endpage > div > p > a > img\')                
#print(img)                
#print(img)                
url = img[\'src\']                
title = img[\'title\']
#获取路径                
path = os.path.join(os.getcwd(), \'图片\', title + \'.jpg\')
if len(self.browser.window_handles) > 1:                    
self.browser.switch_to.window(self.browser.window_handles[1])                    
self.browser.close()                    
self.browser.switch_to.window(self.browser.window_handles[0])
# action = ActionChains(self.browser)                
# action.move_to_element(pic)                
# action.context_click(pic)  # 右键点击该元素                
# action.perform()                
# time.sleep(1)
 # action.send_keys(\'v\')               
  # action.perform()                
  time.sleep(1)
 if os.path.exists(path):                    
 self.ui.listWidget.addItem(title + \'.jpg 已存在,不下载\')                    
 self.ui.listWidget.setCurrentRow(self.ui.listWidget.count() - 1)
else:
                    
# 鼠标移动到位置点右键                    
windll.user32.SetCursorPos(500, 700)                    
win32api.mouse_event(win32con.MOUSEEVENTF_RIGHTDOWN, 0, 0, 0)                    
time.sleep(0.05)                    
win32api.mouse_event(win32con.MOUSEEVENTF_RIGHTUP, 0, 0, 0)                    
time.sleep(1)                    
# 按下v                    
win32api.keybd_event(86, 0, 0, 0)                    
win32api.keybd_event(86, 0, win32con.KEYEVENTF_KEYUP, 0)
# 将路径复制到剪切板                    
win32clipboard.OpenClipboard()                    
win32clipboard.EmptyClipboard()                    
win32clipboard.SetClipboardText(path)                    
win32clipboard.CloseClipboard()
 # 鼠标定位输入框并点击                    
 windll.user32.SetCursorPos(274, 449)                   
  win32api.mouse_event(win32con.MOUSEEVENTF_LEFTDOWN, 0, 0, 0)                    win32api.mouse_event(win32con.MOUSEEVENTF_LEFTUP, 0, 0, 0)                    
  time.sleep(1)
 # 按下ctrl+v                   
  win32api.keybd_event(17, 0, 0, 0)                    
  win32api.keybd_event(86, 0, 0, 0)                    
  win32api.keybd_event(86, 0, win32con.KEYEVENTF_KEYUP, 0)                    
  win32api.keybd_event(17, 0, win32con.KEYEVENTF_KEYUP, 0)                   
   time.sleep(3)
# 按下回车                    
win32api.keybd_event(13, 0, 0, 0)                    
win32api.keybd_event(13, 0, win32con.KEYEVENTF_KEYUP, 0)                   
 time.sleep(2)                    self.ui.listWidget.addItem(title + \'.jpg 下载完成\')                    self.ui.listWidget.setCurrentRow(self.ui.listWidget.count() - 1)
 # res = self.session.get(url, headers=self.headers)               
# print(res.text)                
# with open(\'图片/\' + title + \'.jpg\', \'wb\') as f:                
#     f.write(res.content)                
# print(title + \'.jpg 下载完成\')

 elif a[\'href\'].startswith(\'/index\'):               
  url = \'http://www.netbian.com\' + a[\'href\']                
  print(url)                
  self.browser.get(url)                
  self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, \'.list\')))                
  self.text = self.browser.page_source                
  self.ui.listWidget.addItem(\'下一页\')                
  self.ui.listWidget.setCurrentRow(self.ui.listWidget.count() - 1)                
  self.get_img()

if __name__ == \'__main__\':    app = QApplication([])    
# 设置fusion风格    
app.setStyle(\'Fusion\')    
window = Test()    
window.ui.show()    
app.exec_()

 

最后

今天的分享到这里就完了,祝大家五一快乐鸭!!!

在这里插入图片描述


来源:https://www.cnblogs.com/123456feng/p/16210585.html
本站部分图文来源于网络,如有侵权请联系删除。

未经允许不得转载:百木园 » 快看,这是我为你准备的Python爬取图片教程

相关推荐

  • 暂无文章