-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPDFTools.py
260 lines (203 loc) · 9.21 KB
/
PDFTools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
import os
import sys
import time
import psutil
from tkinter import (N, BooleanVar, Button, Canvas, Checkbutton, Label, PhotoImage, Tk, Toplevel, messagebox, filedialog)
import comtypes.client
from pypdf import PdfReader, PdfWriter
import threading
def docx_to_pdf(docx_filename, pdf_filename, disable_track_changes_var):
# Create a COM object for Word
word = comtypes.client.CreateObject('Word.Application')
# Read the Word Document.
try:
docx_filename = os.path.abspath(docx_filename)
doc = word.Documents.Open(f'"{docx_filename}"')
except comtypes.COMError as e:
messagebox.showerror(title = "Fehler", message = "Die Datei " + docx_filename + " kann nicht geöffnet werden.\nPDF gerade geöffnet?")
word.Quit()
return 0
#lookup if RevisionMode should be disabled or not
if disable_track_changes_var.get():
# Create a thread to delete all comments before continuing.
comment_thread = threading.Thread(target=delete_comments(doc))
comment_thread.start()
comment_thread.join()
# Write the pdf
try:
pdf_filename = os.path.abspath(pdf_filename)
doc.SaveAs(pdf_filename, FileFormat=17)
except comtypes.COMError as e:
messagebox.showerror(title = "Fehler", message = "Die Datei " + pdf_filename + " kann nicht angelegt werden.")
doc.Close()
word.Quit()
return 0
# necessary to quit word instances after a correct run of the pdf generator
doc.Close()
word.Quit()
return 1
def delete_comments(doc):
# Iterate over all comments and delete them
for comment in doc.Comments:
comment.Delete()
# Only disable all TrackRevisions based on the user's choice
doc.TrackRevisions = False
# Accept all revisions
doc.AcceptAllRevisions()
def select_docx_files(convert_button, enable_track_changes_cb):
#for user info message later
pdfcount = 0
# Disable the button for unintentional clicks of certain users ;)
convert_button.config(state="disabled", text="... einen Moment bitte ...")
# close all instances of word to not create a mess...
messagebox.showwarning(title = "Word-Fenster schließen!",
message = "Schließen Sie alle Word-Fenster und klicken Sie dann auf OK.\n\nErläuterung:\nUm fehlerfrei PDF zu erzeugen, dürfen keine anderen Word Instanzen parallel laufen.")
# Create a thread to kill all word instances.
kill_thread = threading.Thread(target=kill_all_word)
# Start the thread.
kill_thread.start()
# Wait for the thread to finish.
kill_thread.join()
# Open a file selection dialog and get the selected files.
docx_filenames = filedialog.askopenfilenames(title='Word-Dateien zur Erzeugung von PDF auswählen', filetypes=[('Word Dokumente', '*.docx')])
# Convert each Word document to a PDF.
for docx_filename in docx_filenames:
# Get the base and extension of the file.
base, ext = os.path.splitext(docx_filename)
# Create the PDF filename.
pdf_filename = base + '.pdf'
# Convert the Word document to a PDF.
count = docx_to_pdf(docx_filename, pdf_filename, enable_track_changes_cb)
pdfcount = pdfcount + count
#print(pdfcount)
convert_button.config(state="disabled", text="Erzeugte PDF: " + str(pdfcount))
root.update()
if pdfcount > 0:
show_temp_message('Erledigt', 'Es wurde(n)\n' + str(pdfcount) + ' PDF erzeugt.')
revert_button_text()
def remove_metadata(meta_button):
pdfcount = 0
meta_button.config(state="disabled", text="Entferne gerade Metadaten...")
# Open the PDF files in read-binary mode
files = filedialog.askopenfilenames(title='PDF auswählen', filetypes=[('PDF Dokumente', '*.pdf')])
# Loop through all selected PDFs
for file in files:
# Get the original file name and extension
name, extension = os.path.splitext(file)
# Generate the temp name for the original PDF
temp_name = os.path.join(name + "_todo" + extension)
# Rename the original PDF
try:
os.rename(file, temp_name)
except PermissionError:
# If the file is in use, skip it and move on to the next file
messagebox.showerror('Fehler', 'Die Datei "{}" wird von einem anderen Programm verwendet und wird daher übersprungen.'.format(file))
continue
except FileExistsError:
messagebox.showerror('Fehler', 'Die temporäre Datei "{}" gibt es bereits. Datei übersprungen.\n\nBitte löschen Sie diese Datei und starten das PDF Erzeugen neu.'.format(temp_name))
continue
# Open the PDF in read-binary mode
with open(temp_name, 'rb') as file:
# Create a PDF object
pdf = PdfReader(temp_name)
# Create a PDF object to write the output to
output_pdf = PdfWriter()
# Iterate through all pages in the PDF
for page in pdf.pages:
output_pdf.add_page(page)
output_pdf.add_metadata(
{
"/Creator": "",
"/Producer": "",
"/Author": "",
"/Title": "",
"/Subject": "",
"/Keywords": "",
"/CreationDate": "",
"/ModDate": "",
}
)
output_file = name + extension
with open(output_file, 'wb') as f:
output_pdf.write(f)
pdfcount+=1
os.remove(temp_name)
revert_button_text()
if pdfcount >0:
show_temp_message("erledigt...", "Die Metadaten\nvon " + str(pdfcount) + " PDF\nwurden entfernt.")
def show_temp_message(title, message, seconds=5):
# Create a new top-level window for the message.
root = Toplevel()
root.overrideredirect(True)
#window.geometry("300x200")
root.title(title)
# get the screen width and height
screen_width = root.winfo_screenwidth()
screen_height = root.winfo_screenheight()
# calculate the x and y coordinates of the message box
x = (screen_width // 2) - 235
y = (screen_height // 2) - 95
# Create a new top-level window for the message.
root.title(title)
root.geometry(f"+{x}+{y}")
# Create a label for the message.
label = Label(root, text=message, font=("Helvetica", 50))
label.pack()
# Close the window after a certain number of seconds.
root.after_idle(lambda: root.after(seconds * 1000, root.destroy))
def revert_button_text():
# Reset button to original text
meta_button.config(state="active", text = "Metadaten aus PDF entfernen")
#enable the button again to create another batch of PDF files.
convert_button.config(state="active", text ="PDF aus Docx erzeugen")
def kill_all_word():
# Iterate over all running processes
for proc in psutil.process_iter():
try:
# Get process details as a named tuple
process_info = proc.as_dict(attrs=['pid', 'name'])
process_name = process_info['name'].lower()
# Check if the process is Microsoft Word
if 'winword.exe' == process_name:
# Terminate the process
process = psutil.Process(process_info['pid'])
process.terminate()
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
pass
def resource_path(relative_path):
""" Get absolute path to resource, works for dev and for PyInstaller """
try:
# PyInstaller creates a temp folder and stores path in _MEIPASS
base_path = sys._MEIPASS
except Exception:
base_path = os.path.abspath(".")
return os.path.join(base_path, relative_path)
# Create the main window
root = Tk()
root.iconbitmap(resource_path("PDFTools.ico"))
# Set the window title
root.title("PDF-Tools v1.5 (buc @ hems.de)")
# Set the window size
#root.geometry("430x250")
#make the window not resizeable
root.resizable(0, 0)
#place image
pimage = PhotoImage(file=resource_path("hla.png"))
label1 = Label(image=pimage)
label1.image = pimage
label1.grid(row=0, column=0, sticky=N,columnspan=2)
# Add a button to start converting the docx
convert_button = Button(root, text ="PDF aus Docx erzeugen", width = 20, command=lambda: select_docx_files(convert_button, enable_track_changes_var), font=("Helvetica", 14))
convert_button.grid(row=1, column=1, padx=5, pady=5)
# check if Revisionmode should be disabled and all changes accepted.
enable_track_changes_var = BooleanVar()
enable_track_changes_cb = Checkbutton(root, text="Evtl. Kommentare löschen,\nNachverfolgung beenden\nund Änderungen annehmen", variable=enable_track_changes_var)
enable_track_changes_cb.grid(row=1, column=0, padx=5, pady=5)
canvas = Canvas(root, height=1)
canvas.create_line(2, 2, 500, 2, dash=(4,2))
canvas.grid(row=2, column=0, columnspan=2, padx=5, pady=5)
# Add a button to start cleaning the PDFs
meta_button = Button(root, text="Metadaten aus PDF entfernen", command=lambda: remove_metadata(meta_button), font=("Helvetica", 14))
meta_button.grid(row=3, column=0, columnspan=2, padx=5, pady=10)
# Run the Tkinter event loop
root.mainloop()