PDF 파워업: 로컬 Python 앱으로 PDF 병합, 순서 재배열 및 복호화
발행: (2025년 12월 8일 오후 07:51 GMT+9)
5 min read
원문: Dev.to
Source: Dev.to
데이터와 프라이버시를 보호 🔐, 노트북에서 PDF를 병합하세요.
여러 PDF 문서를 빠르게 결합해야 하는 경우가 많지만, 민감한 파일을 무작위 웹사이트에 업로드하는 것이 보안 및 프라이버시 측면에서 우려됩니다. 특히 비밀번호로 보호된 문서를 만나면 알 수 없는 제3자 서비스에 로그인 자격 증명을 제공해야 하는 상황이 발생합니다. 로컬에서 신뢰할 수 있고 기능이 풍부한 솔루션을 선호하여, 맞춤형 PDF 병합 애플리케이션을 직접 만들었습니다. 코드는 공개 GitHub 저장소에 공개되어 있으며 필요에 맞게 수정할 수 있습니다.
가상 환경 준비
python3 -m venv venv
source venv/bin/activate
pip install --upgrade pip
필요한 패키지 설치
pip install streamlit PyPDF2 watchdog streamlit-sortables
# 또는
pip install -r requirements.txt
요구 사항
streamlitPyPDF2watchdogstreamlit-sortables
Streamlit GUI 애플리케이션
# pdf_merger_ST_VPW.py
import streamlit as st
import os
from PyPDF2 import PdfMerger, PdfReader
from io import BytesIO
from streamlit_sortables import sort_items
st.set_page_config(
page_title="PDF Merger App",
page_icon="📄",
layout="centered"
)
st.title("🔐 PDF Merger App")
st.markdown("""
Upload PDF files and drag-and-drop to set the merge order.
**If a file is password-protected, enter the password below.**
""")
def merge_pdfs_streamlit(ordered_file_data, password_map, output_filename="merged_output.pdf"):
"""
Merges file-like objects, handling password-protected files via the password_map.
Args:
ordered_file_data (list): A list of dictionaries containing the file name and the file object.
password_map (dict): A map of filename -> password for protected files.
Returns:
tuple: (bool, BytesIO object or error message)
"""
if not ordered_file_data:
return False, "Please upload and order at least one PDF file."
merger = PdfMerger()
successful_merges = 0
for item in ordered_file_data:
file = item['file_object']
file_name = item['label']
if file.type == "application/pdf":
try:
file.seek(0)
file_buffer = BytesIO(file.read())
pdf_reader = PdfReader(file_buffer)
if pdf_reader.is_encrypted:
password = password_map.get(file_name)
if password:
decryption_result = pdf_reader.decrypt(password)
if decryption_result == 1: # Success
st.info(f"🔑 Decrypted **{file_name}** successfully.")
elif decryption_result == -1: # Failed
st.error(f"❌ Failed to decrypt **{file_name}**. Password incorrect or file corrupted. Skipping file.")
continue
else:
st.error(f"🔒 **{file_name}** is password-protected. Please provide the password above to include it in the merge. Skipping file.")
continue
merger.append(pdf_reader)
successful_merges += 1
except Exception as e:
st.error(f"Skipped file '{file_name}' due to a critical read error: {e}")
else:
st.warning(f"Skipped file '{file_name}' because it is not a valid PDF.")
if successful_merges == 0:
merger.close()
return False, "No valid PDF files were successfully processed."
output_buffer = BytesIO()
try:
merger.write(output_buffer)
output_buffer.seek(0)
return True, output_buffer
except Exception as e:
return False, f"Error writing merged file: {e}"
finally:
merger.close()
# --- Streamlit UI Implementation ---
if 'uploaded_files_map' not in st.session_state:
st.session_state.uploaded_files_map = {}
uploaded_files = st.file_uploader(
"1. Choose PDF files",
type="pdf",
accept_multiple_files=True
)
if uploaded_files:
current_names = set(f.name for f in uploaded_files)
if current_names != set(st.session_state.uploaded_files_map.keys()):
st.session_state.uploaded_files_map = {}
for file in uploaded_files:
st.session_state.uploaded_files_map[file.name] = file
if st.session_state.uploaded_files_map:
st.subheader("2. Password Input (If Needed)")
password_map = {}
for name in sorted(st.session_state.uploaded_files_map.keys()):
password = st.text_input(
f"Password for **{name}** (Leave blank if not protected)",
type="password",
key=f"password_{name}"
)
if password:
password_map[name] = password
st.subheader("3. Drag-and-Drop to Reorder")
st.info("Drag the items to set the desired merge sequence (top-to-bottom).")
initial_items = list(st.session_state.uploaded_files_map.keys())
reordered_names = sort_items(
items=initial_items,
key="pdf_list_key"
)
st.subheader("4. Finalize and Merge")
default_name = "merged_documents.pdf"
if reordered_names:
base_name = reordered_names[0].replace('.pdf', '')
default_name = f"{base_name}_merged.pdf"
output_name = st.text_input(
"Enter output file name (e.g., final_report.pdf)",
value=default_name,
help="The output file will **not** be password-protected."
)
if st.button("✨ Execute Merge"):
if not reordered_names:
st.warning("Please upload files or ensure the list is not empty.")
elif not output_name.lower().endswith('.pdf'):
st.error("The output filename must end with **.pdf**")
else:
ordered_file_data = []
for name in reordered_names:
ordered_file_data.append({
'label': name,
'file_object': st.session_state.uploaded_files_map[name]
})
with st.spinner('Processing and Merging your PDF files...'):
success, result = merge_pdfs_streamlit(ordered_file_data, password_map, output_name)
if success:
st.success(f"✅ Success! Merged {len(ordered_file_data)} PDF files. The final file is decrypted.")
st.download_button(
label="⬇️ Download Merged PDF",
data=result,
file_name=output_name,
mime="application/pdf"
)
st.balloons()
else:
st.error(f"❌ Merge failed: {result}")
else:
st.info("Upload your PDF files to begin the merging process.")
기본 콘솔 애플리케이션 (폴더 기반 병합)
# pdf_merger_2.py
import os
from PyPDF2 import PdfMerger, PdfReader
def merge_pdfs_in_directory(input_folder, output_folder, output_filename="merged_output.pdf"):
"""
Recursively finds all PDF files in the input_folder and merges them
into a single PDF file in the output_folder.
Args:
input_folder (str): The path to the folder containing PDF files.
output_folder (str): The path where the merged PDF will be saved.
output_filename (str): Name of the resulting merged PDF.
"""
merger = PdfMerger()
for root, _, files in os.walk(input_folder):
for file in sorted(files):
if file.lower().endswith('.pdf'):
pdf_path = os.path.join(root, file)
try:
with open(pdf_path, 'rb') as f:
reader = PdfReader(f)
if reader.is_encrypted:
# Prompt for password if needed (simple example)
password = input(f"Enter password for '{file}': ")
if reader.decrypt(password) != 1:
print(f"Skipping encrypted file '{file}' (wrong password).")
continue
merger.append(reader)
print(f"Added '{file}'")
except Exception as e:
print(f"Error processing '{file}': {e}")
os.makedirs(output_folder, exist_ok=True)
output_path = os.path.join(output_folder, output_filename)
with open(output_path, 'wb') as out_f:
merger.write(out_f)
merger.close()
print(f"Merged PDF saved to: {output_path}")
# Example usage:
# merge_pdfs_in_directory('path/to/input_folder', 'path/to/output_folder')