PDF 파워업: 로컬 Python 앱으로 PDF 병합, 순서 재배열 및 복호화

발행: (2025년 12월 8일 오후 07:51 GMT+9)
5 min read
원문: Dev.to

Source: Dev.to

데이터와 프라이버시를 보호 🔐, 노트북에서 PDF를 병합하세요.

여러 PDF 문서를 빠르게 결합해야 하는 경우가 많지만, 민감한 파일을 무작위 웹사이트에 업로드하는 것이 보안 및 프라이버시 측면에서 우려됩니다. 특히 비밀번호로 보호된 문서를 만나면 알 수 없는 제3자 서비스에 로그인 자격 증명을 제공해야 하는 상황이 발생합니다. 로컬에서 신뢰할 수 있고 기능이 풍부한 솔루션을 선호하여, 맞춤형 PDF 병합 애플리케이션을 직접 만들었습니다. 코드는 공개 GitHub 저장소에 공개되어 있으며 필요에 맞게 수정할 수 있습니다.

가상 환경 준비

python3 -m venv venv
source venv/bin/activate

pip install --upgrade pip

필요한 패키지 설치

pip install streamlit PyPDF2 watchdog streamlit-sortables
# 또는
pip install -r requirements.txt

요구 사항

  • streamlit
  • PyPDF2
  • watchdog
  • streamlit-sortables

Streamlit GUI 애플리케이션

# pdf_merger_ST_VPW.py
import streamlit as st
import os
from PyPDF2 import PdfMerger, PdfReader
from io import BytesIO
from streamlit_sortables import sort_items 

st.set_page_config(
    page_title="PDF Merger App",
    page_icon="📄",
    layout="centered"
)

st.title("🔐 PDF Merger App")
st.markdown("""
    Upload PDF files and drag-and-drop to set the merge order.
    **If a file is password-protected, enter the password below.**
""")

def merge_pdfs_streamlit(ordered_file_data, password_map, output_filename="merged_output.pdf"):
    """
    Merges file-like objects, handling password-protected files via the password_map.

    Args:
        ordered_file_data (list): A list of dictionaries containing the file name and the file object.
        password_map (dict): A map of filename -> password for protected files.

    Returns:
        tuple: (bool, BytesIO object or error message)
    """
    if not ordered_file_data:
        return False, "Please upload and order at least one PDF file."

    merger = PdfMerger()
    successful_merges = 0

    for item in ordered_file_data:
        file = item['file_object'] 
        file_name = item['label']   

        if file.type == "application/pdf":
            try:
                file.seek(0)

                file_buffer = BytesIO(file.read())
                pdf_reader = PdfReader(file_buffer)

                if pdf_reader.is_encrypted:
                    password = password_map.get(file_name)

                    if password:
                        decryption_result = pdf_reader.decrypt(password)

                        if decryption_result == 1: # Success
                            st.info(f"🔑 Decrypted **{file_name}** successfully.")
                        elif decryption_result == -1: # Failed
                            st.error(f"❌ Failed to decrypt **{file_name}**. Password incorrect or file corrupted. Skipping file.")
                            continue
                    else:
                        st.error(f"🔒 **{file_name}** is password-protected. Please provide the password above to include it in the merge. Skipping file.")
                        continue

                merger.append(pdf_reader)
                successful_merges += 1

            except Exception as e:
                st.error(f"Skipped file '{file_name}' due to a critical read error: {e}")
        else:
            st.warning(f"Skipped file '{file_name}' because it is not a valid PDF.")

    if successful_merges == 0:
        merger.close()
        return False, "No valid PDF files were successfully processed."

    output_buffer = BytesIO()
    try:
        merger.write(output_buffer)
        output_buffer.seek(0)
        return True, output_buffer
    except Exception as e:
        return False, f"Error writing merged file: {e}"
    finally:
        merger.close()

# --- Streamlit UI Implementation ---

if 'uploaded_files_map' not in st.session_state:
    st.session_state.uploaded_files_map = {}

uploaded_files = st.file_uploader(
    "1. Choose PDF files",
    type="pdf",
    accept_multiple_files=True
)

if uploaded_files:
    current_names = set(f.name for f in uploaded_files)
    if current_names != set(st.session_state.uploaded_files_map.keys()):
        st.session_state.uploaded_files_map = {}
        for file in uploaded_files:
            st.session_state.uploaded_files_map[file.name] = file

    if st.session_state.uploaded_files_map:
        st.subheader("2. Password Input (If Needed)")
        password_map = {}

        for name in sorted(st.session_state.uploaded_files_map.keys()):
            password = st.text_input(
                f"Password for **{name}** (Leave blank if not protected)",
                type="password",
                key=f"password_{name}"
            )
            if password:
                password_map[name] = password

        st.subheader("3. Drag-and-Drop to Reorder")
        st.info("Drag the items to set the desired merge sequence (top-to-bottom).")

        initial_items = list(st.session_state.uploaded_files_map.keys())

        reordered_names = sort_items(
            items=initial_items,
            key="pdf_list_key" 
        )

        st.subheader("4. Finalize and Merge")

        default_name = "merged_documents.pdf"
        if reordered_names:
            base_name = reordered_names[0].replace('.pdf', '')
            default_name = f"{base_name}_merged.pdf"

        output_name = st.text_input(
            "Enter output file name (e.g., final_report.pdf)",
            value=default_name,
            help="The output file will **not** be password-protected."
        )

        if st.button("✨ Execute Merge"):
            if not reordered_names:
                st.warning("Please upload files or ensure the list is not empty.")
            elif not output_name.lower().endswith('.pdf'):
                st.error("The output filename must end with **.pdf**")
            else:
                ordered_file_data = []
                for name in reordered_names:
                    ordered_file_data.append({
                        'label': name,
                        'file_object': st.session_state.uploaded_files_map[name]
                    })

                with st.spinner('Processing and Merging your PDF files...'):
                    success, result = merge_pdfs_streamlit(ordered_file_data, password_map, output_name)

                if success:
                    st.success(f"✅ Success! Merged {len(ordered_file_data)} PDF files. The final file is decrypted.")
                    st.download_button(
                        label="⬇️ Download Merged PDF",
                        data=result,
                        file_name=output_name,
                        mime="application/pdf"
                    )
                    st.balloons()
                else:
                    st.error(f"❌ Merge failed: {result}")

else:
    st.info("Upload your PDF files to begin the merging process.")

기본 콘솔 애플리케이션 (폴더 기반 병합)

# pdf_merger_2.py
import os
from PyPDF2 import PdfMerger, PdfReader

def merge_pdfs_in_directory(input_folder, output_folder, output_filename="merged_output.pdf"):
    """
    Recursively finds all PDF files in the input_folder and merges them
    into a single PDF file in the output_folder.

    Args:
        input_folder (str): The path to the folder containing PDF files.
        output_folder (str): The path where the merged PDF will be saved.
        output_filename (str): Name of the resulting merged PDF.
    """
    merger = PdfMerger()
    for root, _, files in os.walk(input_folder):
        for file in sorted(files):
            if file.lower().endswith('.pdf'):
                pdf_path = os.path.join(root, file)
                try:
                    with open(pdf_path, 'rb') as f:
                        reader = PdfReader(f)
                        if reader.is_encrypted:
                            # Prompt for password if needed (simple example)
                            password = input(f"Enter password for '{file}': ")
                            if reader.decrypt(password) != 1:
                                print(f"Skipping encrypted file '{file}' (wrong password).")
                                continue
                        merger.append(reader)
                        print(f"Added '{file}'")
                except Exception as e:
                    print(f"Error processing '{file}': {e}")

    os.makedirs(output_folder, exist_ok=True)
    output_path = os.path.join(output_folder, output_filename)
    with open(output_path, 'wb') as out_f:
        merger.write(out_f)
    merger.close()
    print(f"Merged PDF saved to: {output_path}")

# Example usage:
# merge_pdfs_in_directory('path/to/input_folder', 'path/to/output_folder')
Back to Blog

관련 글

더 보기 »

커널 Rust 실험의 끝

기사 URL: https://lwn.net/Articles/1049831/ 댓글 URL: https://news.ycombinator.com/item?id=46213585 점수: 66 댓글: 22