欢迎来到尧图网

客户服务 关于我们

您的位置:首页 > 新闻 > 国际 > Python PDF批量转图片工具

Python PDF批量转图片工具

2024/12/22 1:25:30 来源:https://blog.csdn.net/Clay_K/article/details/144366998  浏览:    关键词:Python PDF批量转图片工具

Python PDF批量转图片工具

1.简介:

这是一个简单易用的PDF转图片工具,主要功能:

  1. 可以批量处理多个PDF文件
  2. 可以选择需要转换的具体页面
  3. 支持两种设置方式:
    • 统一设置:所有PDF使用相同的页码
    • 单独设置:每个PDF单独设置页码

使用方法:

  1. 点击"选择文件夹",选择PDF所在文件夹
  2. 选择设置方式并输入要转换的页码
    例如:1,2,3 或 1-5 或 1,3-5
  3. 点击"开始转换"即可

转换后的图片会自动保存在原PDF所在目录下的"文件名_images"文件夹中

2.运行效果:

在这里插入图片描述

在这里插入图片描述

3.相关源码:

import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import os
import fitz
import threading
from queue import Queue
import gc
from typing import Dict, Listclass PDFConverter:def __init__(self):self.root = tk.Tk()self.root.title("PDF批量转图片工具")self.root.geometry("900x700")# 存储PDF文件和对应的页码设置self.pdf_settings: Dict[str, List[int]] = {}# 创建工作队列self.work_queue = Queue()self.setup_ui()def setup_ui(self):# 文件夹选择框folder_frame = ttk.Frame(self.root)folder_frame.pack(fill=tk.X, padx=5, pady=5)self.folder_path = tk.StringVar()ttk.Entry(folder_frame, textvariable=self.folder_path).pack(side=tk.LEFT, fill=tk.X, expand=True)ttk.Button(folder_frame, text="选择文件夹", command=self.select_folder).pack(side=tk.RIGHT)# 添加帮助按钮help_frame = ttk.Frame(self.root)help_frame.pack(fill=tk.X, padx=5)ttk.Button(help_frame, text="使用帮助", command=self.show_help).pack(side=tk.RIGHT)# 设置模式选择self.mode_frame = ttk.LabelFrame(self.root, text="设置模式")self.mode_frame.pack(fill=tk.X, padx=5, pady=5)self.setting_mode = tk.StringVar(value="batch")ttk.Radiobutton(self.mode_frame, text="统一设置", variable=self.setting_mode, value="batch", command=self.toggle_setting_mode).pack(side=tk.LEFT, padx=10)ttk.Radiobutton(self.mode_frame, text="单独设置", variable=self.setting_mode, value="individual", command=self.toggle_setting_mode).pack(side=tk.LEFT, padx=10)# 统一设置页码框架self.batch_setting_frame = ttk.LabelFrame(self.root, text="统一页码设置")self.batch_setting_frame.pack(fill=tk.X, padx=5, pady=5)ttk.Label(self.batch_setting_frame, text="页码格式(例如: 1,2,3-5):").pack(side=tk.LEFT)self.batch_pages = ttk.Entry(self.batch_setting_frame)self.batch_pages.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=5)ttk.Button(self.batch_setting_frame, text="应用", command=self.apply_batch_settings).pack(side=tk.RIGHT)# 控制按钮面板(移到页码设置下面)control_frame = ttk.Frame(self.root)control_frame.pack(fill=tk.X, padx=5, pady=5)# 转换按钮和进度条ttk.Button(control_frame, text="开始转换", command=self.start_conversion).pack(side=tk.LEFT, padx=5)self.progress = ttk.Progressbar(control_frame, mode='determinate')self.progress.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=5)# 添加清理内存按钮ttk.Button(control_frame, text="清理内存", command=self.clean_memory).pack(side=tk.RIGHT, padx=5)# 创建左右分栏main_pane = ttk.PanedWindow(self.root, orient=tk.HORIZONTAL)main_pane.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)# 左侧面板(文件列表)left_frame = ttk.Frame(main_pane)main_pane.add(left_frame, weight=2)# PDF文件列表框架list_frame = ttk.LabelFrame(left_frame, text="PDF文件列表")list_frame.pack(fill=tk.BOTH, expand=True)# 创建树形视图columns = ("文件名", "页码设置")self.tree = ttk.Treeview(list_frame, columns=columns, show="headings")self.tree.heading("文件名", text="文件名")self.tree.heading("页码设置", text="页码设置")self.tree.column("文件名", width=400)self.tree.column("页码设置", width=200)# 创建页码输入框self.page_entries = {}  # 存储每个项目的输入框# 添加滚动条scrollbar = ttk.Scrollbar(list_frame, orient=tk.VERTICAL, command=self.tree.yview)scrollbar.pack(side=tk.RIGHT, fill=tk.Y)self.tree.configure(yscrollcommand=scrollbar.set)self.tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)# 右侧处理日志面板right_frame = ttk.LabelFrame(main_pane, text="处理日志")main_pane.add(right_frame, weight=1)# 创建日志文本框self.log_text = tk.Text(right_frame, wrap=tk.WORD, width=40)log_scrollbar = ttk.Scrollbar(right_frame, command=self.log_text.yview)self.log_text.configure(yscrollcommand=log_scrollbar.set)self.log_text.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)log_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)# 初始化显示状态self.toggle_setting_mode()def toggle_setting_mode(self):"""切换设置模式"""mode = self.setting_mode.get()if mode == "batch":self.batch_setting_frame.pack(after=self.mode_frame, fill=tk.X, padx=5, pady=5)# 隐藏所有单独设置的输入框for entry in self.page_entries.values():entry.place_forget()else:self.batch_setting_frame.pack_forget()# 显示所有单独设置的输入框self.update_page_entries()def load_pdf_files(self):"""加载PDF文件列表"""# 清空现有列表和输入框for item in self.tree.get_children():self.tree.delete(item)self.pdf_settings.clear()self.page_entries.clear()# 加载新的PDF文件folder = self.folder_path.get()for file in os.listdir(folder):if file.lower().endswith('.pdf'):full_path = os.path.join(folder, file)item = self.tree.insert("", tk.END, values=(file, ""))self.pdf_settings[full_path] = []# 为每个项目创建输入框self.create_entry_for_item(item)# 如果是单独设置模式,显示输入框if self.setting_mode.get() == "individual":self.update_page_entries()def create_entry_for_item(self, item):"""为树形视图项目创建输入框"""entry = ttk.Entry(self.tree)entry.bind('<Return>', lambda e, i=item: self.on_entry_change(e, i))entry.bind('<FocusOut>', lambda e, i=item: self.on_entry_change(e, i))self.page_entries[item] = entrydef update_page_entries(self):"""更新所有输入框的位置"""for item in self.tree.get_children():entry = self.page_entries[item]# 获取页码设置列的位置bbox = self.tree.bbox(item, "页码设置")if bbox:x, y, w, h = bboxentry.place(x=x, y=y, width=w, height=h)entry.delete(0, tk.END)entry.insert(0, self.tree.item(item)['values'][1])def on_entry_change(self, event, item):"""处理输入框内容变化"""entry = event.widgettry:page_string = entry.get()file_name = self.tree.item(item)['values'][0]full_path = os.path.join(self.folder_path.get(), file_name)if page_string.strip():pages = self.parse_page_numbers(page_string)self.pdf_settings[full_path] = pagesself.tree.set(item, "页码设置", page_string)else:self.pdf_settings[full_path] = []self.tree.set(item, "页码设置", "")except ValueError:messagebox.showerror("错误", "页码格式错误")entry.focus_set()def parse_page_numbers(self, page_string: str) -> List[int]:pages = set()if not page_string.strip():return []for part in page_string.split(','):part = part.strip()if '-' in part:start, end = map(int, part.split('-'))pages.update(range(start, end + 1))else:pages.add(int(part))return sorted(list(pages))def apply_batch_settings(self):page_string = self.batch_pages.get()try:pages = self.parse_page_numbers(page_string)for pdf_path in self.pdf_settings:self.pdf_settings[pdf_path] = pages# 更新显示for item in self.tree.get_children():self.tree.set(item, "页码设置", page_string)except ValueError:messagebox.showerror("错误", "页码格式错误")def convert_pdf_to_images(self, pdf_path: str, pages: List[int]):try:doc = fitz.open(pdf_path)base_name = os.path.splitext(os.path.basename(pdf_path))[0]output_dir = os.path.join(os.path.dirname(pdf_path), f"{base_name}_images")os.makedirs(output_dir, exist_ok=True)self.log_message(f"开始处理文件: {base_name}")total_pages = len(pages)for idx, page_num in enumerate(pages, 1):if page_num <= len(doc):self.log_message(f"处理页面 {page_num} ({idx}/{total_pages})")page = doc[page_num - 1]pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))output_path = os.path.join(output_dir, f"{base_name}_page_{page_num}.png")pix.save(output_path)pix = None  # 释放内存page = None  # 释放页面对象gc.collect()  # 及时清理内存doc.close()self.log_message(f"文件 {base_name} 处理完成")except Exception as e:error_msg = f"转换文件 {base_name} 时出错: {str(e)}"self.log_message(error_msg)messagebox.showerror("错误", error_msg)def process_queue(self):"""处理队列中的任务"""while True:try:# 非阻塞方式获取任务pdf_path = self.work_queue.get_nowait()pages = self.pdf_settings[pdf_path]self.convert_pdf_to_images(pdf_path, pages)self.work_queue.task_done()# 更新进度with self.lock:self.completed_files += 1progress = (self.completed_files / self.total_files) * 100self.progress['value'] = progressself.root.update()except queue.Empty:break  # 队列为空时退出except Exception as e:self.log_message(f"处理出错: {str(e)}")breakdef start_conversion(self):if not self.pdf_settings:messagebox.showwarning("警告", "没有选择PDF文件")return# 初始化计数器和锁self.completed_files = 0self.total_files = sum(1 for pdf_path in self.pdf_settings if self.pdf_settings[pdf_path])self.lock = threading.Lock()if self.total_files == 0:messagebox.showwarning("警告", "没有设置页码的文件")returnself.log_message("开始转换处理...")# 将任务添加到队列for pdf_path in self.pdf_settings:if self.pdf_settings[pdf_path]:self.work_queue.put(pdf_path)# 创建并启动工作线程num_threads = min(4, self.total_files)self.log_message(f"启动 {num_threads} 个工作线程")threads = []for i in range(num_threads):t = threading.Thread(target=self.process_queue, daemon=True)t.start()threads.append(t)# 创建监控线程monitor_thread = threading.Thread(target=self.monitor_conversion, args=(threads,), daemon=True)monitor_thread.start()def monitor_conversion(self, threads):"""监控转换进度"""try:# 等待所有任务完成self.work_queue.join()# 等待所有线程结束for t in threads:t.join()# 在主线程中更新UIself.root.after(0, self.conversion_completed)except Exception as e:self.log_message(f"监控线程出错: {str(e)}")def conversion_completed(self):"""转换完成后的处理"""self.clean_memory()self.log_message("所有文件处理完成!")self.progress['value'] = 0messagebox.showinfo("完成", "所有PDF文件转换完成!")def run(self):self.root.mainloop()def select_folder(self):"""选择文件夹并加载PDF文件"""folder = filedialog.askdirectory()if folder:self.folder_path.set(folder)self.load_pdf_files()def log_message(self, message):"""添加日志消息"""self.log_text.insert(tk.END, f"{message}\n")self.log_text.see(tk.END)self.root.update()def clean_memory(self):"""手&#65533;&#65533;清理内存"""gc.collect()self.log_message("已执行内存清理")def show_help(self):"""显示使用帮助对话框"""help_text = """
PDF批量转图片工具这是一个简单易用的PDF转图片工具,主要功能:
1. 可以批量处理多个PDF文件
2. 可以选择需要转换的具体页面
3. 支持两种设置方式:- 统一设置:所有PDF使用相同的页码- 单独设置:每个PDF单独设置页码使用方法:
1. 点击"选择文件夹",选择PDF所在文件夹
2. 选择设置方式并输入要转换的页码例如:1,2,3 或 1-5 或 1,3-5
3. 点击"开始转换"即可转换后的图片会自动保存在原PDF所在目录下的"文件名_images"文件夹中。"""# 创建帮助窗口help_window = tk.Toplevel(self.root)help_window.title("使用帮助")help_window.geometry("600x500")# 创建文本框和滚动条text_frame = ttk.Frame(help_window)text_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)help_text_widget = tk.Text(text_frame, wrap=tk.WORD, width=60, height=25)scrollbar = ttk.Scrollbar(text_frame, orient=tk.VERTICAL, command=help_text_widget.yview)help_text_widget.configure(yscrollcommand=scrollbar.set)help_text_widget.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)scrollbar.pack(side=tk.RIGHT, fill=tk.Y)# 插入帮助文本help_text_widget.insert(tk.END, help_text)help_text_widget.configure(state='disabled')  # 设置为只读# 添加关闭按钮ttk.Button(help_window, text="关闭", command=help_window.destroy).pack(pady=10)# 设置窗口模态help_window.transient(self.root)help_window.grab_set()# 将窗口居中显示help_window.update_idletasks()width = help_window.winfo_width()height = help_window.winfo_height()x = (help_window.winfo_screenwidth() // 2) - (width // 2)y = (help_window.winfo_screenheight() // 2) - (height // 2)help_window.geometry(f'{width}x{height}+{x}+{y}')if __name__ == "__main__":app = PDFConverter()app.run()

版权声明:

本网仅为发布的内容提供存储空间,不对发表、转载的内容提供任何形式的保证。凡本网注明“来源:XXX网络”的作品,均转载自其它媒体,著作权归作者所有,商业转载请联系作者获得授权,非商业转载请注明出处。

我们尊重并感谢每一位作者,均已注明文章来源和作者。如因作品内容、版权或其它问题,请及时与我们联系,联系邮箱:809451989@qq.com,投稿邮箱:809451989@qq.com