安装依赖
其他依赖请自行安装
pip install urllib
pip install lxml
pip install alive_progress
代码
import os
import urllib
import requests
from lxml import etree
from alive_progress import alive_bar
'''
Auth:Kenvie
Date:2022/08/13
Desc:爬取 telegra.ph 图片
'''
print('*********************************************************')
print(' 爬取 telegra.ph 图片 ')
print(' Auth:Kenvie ')
print(' Blog: kenvie.com ')
print('*********************************************************')
print(' * ')
print(' 链接格式:https://telegra.ph/********* ')
print(' * ')
print('*********************************************************')
url = input("请输入超链接:")
name = urllib.parse.unquote(url)[19:] # 获取文件夹名字
print(' ')
Folder = input("请输入存储路径:") # 存储路径
PATH = Folder+'\\'+name+'\\' # 拼接 存储路径+文件夹名字 图片存放位置
isexist = os.path.exists(PATH) # 判断路径在不在
if not isexist:
os.mkdir(PATH) # 如果不在就创建
else:
print("目录已存在,以存入"+PATH) # 如果存在就存入
URL = 'https://telegra.ph/'
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36'
}
r = requests.get(url=url, headers=headers)
html = etree.HTML(r.text) # 接收request数据
data_list = html.xpath('//img/@src')
print('存储路径为:'+PATH)
print('*********************************************************')
print(' ')
with alive_bar(len(data_list)) as bar:
for data in data_list:
data1 = str(data)
file_url = URL+data1
data2 = requests.get(url=file_url, headers=headers).content
file_name = str(data1).split('/')[-1].split('.')[0]
file_path = PATH + file_name+'.jpg'
with open(file_path, 'wb') as f:
f.write(data2)
bar()
print('下载已结束!')
print(' ')
print('存储目录为:'+PATH)
os.system(f'explorer /select, {PATH}')