๐ก๏ธ Python์ผ๋ก ์ค๋งํธ Excel ๋ฐ์ดํฐ ํด๋ฆฌ๋ ๊ตฌ์ถ (๋จ๊ณ๋ณ)
Iโm happy to translate the article for you, but I need the text youโd like translated. Could you please paste the articleโs content (excluding the source line you already provided) here? Once I have the text, Iโll translate it into Korean while preserving the original formatting, markdown, and code blocks.
๊ตฌ์ถํ ๋ด์ฉ
- Excel ์ ๋ฆฌ๋ฅผ ์ํ ๋ฐ์คํฌํฑ GUI ์ฑ
- ์๋ ๊ฒฐ์ธก๊ฐ ์ฒ๋ฆฌ
- ์ค๋ณต ํ์ง
- ํด๋ฆฌ์คํฑ โ๋ฐ์ดํฐ ๊ฑด๊ฐโ ์ ์ ๋งค๊ธฐ๊ธฐ
- ๊ฒฐ๊ณผ๋ฅผ Excel, PDF, JSON, ๋ฐ TXT ํ์์ผ๋ก ๋ด๋ณด๋ด๊ธฐ
GitHub ์ ์ฅ์ (์ ์ฒด ์คํฌ๋ฆฝํธ):
๐
๐งฐ ์ ์ ์กฐ๊ฑด
- Pythonโฏ3.9+
- ๊ธฐ๋ณธ Python ์ง์
ํ์ํ ํจํค์ง ์ค์น
pip install pandas numpy openpyxl ttkbootstrap reportlab
๐ ํ๋ก์ ํธ ๊ตฌ์กฐ
SmartExcelGuardian/
โโโ main.py
โโโ logo.ico
โโโ excelguardian.log
1๏ธโฃ Import Required Libraries
# Core
import os
import sys
import threading
import json
import tkinter as tk
from tkinter import filedialog
# UI
import ttkbootstrap as tb
from ttkbootstrap.constants import *
# Misc
from datetime import datetime
Why these modules?
| Module | Purpose |
|---|---|
tkinter | GUI ๊ธฐ๋ฐ |
ttkbootstrap | ํ๋์ ์ธ ๋คํฌ UI ํ ๋ง |
threading | ์ ๋ฆฌ ์์ ์ค UI ์๋ต์ฑ ์ ์ง |
pandas | ๋ฐ์ดํฐ ์ ๋ฆฌ |
numpy | ์์น ์ฐ์ฐ |
openpyxl | Excel ๋ด๋ณด๋ด๊ธฐ ๋ฐ ์์ ์ง์ |
re | ์ด ์ด๋ฆ ์ ๊ทํ |
reportlab | ์ ๋ฌธ PDF ๋ณด๊ณ ์ ์์ฑ |
# Data & Excel
import pandas as pd
import numpy as np
import re
from openpyxl import Workbook
from openpyxl.styles import PatternFill, Font
from openpyxl.utils.dataframe import dataframe_to_rows
# PDF Export
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
from reportlab.lib.units import mm
from reportlab.lib.colors import red, orange, green, black
2๏ธโฃ ์ ์ญ ์ํ ๋ฐ ๋ก๊น
stop_event = threading.Event() # Allows canceling cleanup
cleanup_results = {} # Shared export data
log_file = os.path.join(os.getcwd(), "excelguardian.log")
3๏ธโฃ ์ ํธ๋ฆฌํฐ ํฌํผ ํจ์
๋ฆฌ์์ค ๋ก๋ (ํจํค์ง๋ ์ฑ์ฉ)
def resource_path(file_name):
"""Return absolute path for bundled resources (PyInstaller support)."""
base_path = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__)))
return os.path.join(base_path, file_name)
์ปฌ๋ผโ์ด๋ฆ ํด๋ฆฌ๋
def clean_column_name(name):
"""Normalize column names: strip, lowerโcase, remove punctuation, replace spaces with '_'."""
name = name.strip().lower()
name = re.sub(r"[^\w\s]", "", name) # Remove nonโalphanumeric chars
name = re.sub(r"\s+", "_", name) # Replace spaces with underscores
return name
์์
| ์๋ณธ | ์ ๋ฆฌ๋ |
|---|---|
Total Sales ($) | total_sales |
NumPy โ JSON ๋ณํ๊ธฐ
def convert_numpy(obj):
"""Make NumPy types JSONโserialisable."""
if isinstance(obj, np.integer):
return int(obj)
if isinstance(obj, np.floating):
return float(obj)
if isinstance(obj, np.ndarray):
return obj.tolist()
raise TypeError(f"Object of type {type(obj)} is not JSON serialisable")
4๏ธโฃ ๋ฉ์ธ ์๋์ฐ ๋ง๋ค๊ธฐ
app = tb.Window(themename="darkly")
app.title("SmartExcelGuardian v1.1.0")
app.geometry("1100x650")
์ ttkbootstrap์ธ๊ฐ?
- ๋ฐ๋ก ์ฌ์ฉํ ์ ์๋ ํ๋์ ์ธ ์คํ์ผ๋ง
- ๋ด์ฅ ๋คํฌ ๋ชจ๋ ์ง์
- ๋ฐ์ํ ๋ ์ด์์ ๋์ฐ๋ฏธ
5๏ธโฃ ์ ๋ชฉ ์น์
tb.Label(app,
text="SmartExcelGuardian",
font=("Segoe UI", 22, "bold")).pack(pady=(10, 2))
tb.Label(app,
text="Professional Excel Data Guardian Tool",
font=("Segoe UI", 10, "italic"),
foreground="#9ca3af").pack(pady=(0, 8))
์ฑ ํค๋๋ฅผ ์์ฑํฉ๋๋ค.
6๏ธโฃ Excel ํ์ผ ์ ํ๊ธฐ
file_path = tk.StringVar()
# Row container (youโll need to create `row1` as a Frame first)
tb.Entry(row1,
textvariable=file_path,
width=60).pack(side="left", padx=6)
tb.Button(row1,
text="๐ Excel File",
command=lambda: file_path.set(
filedialog.askopenfilename(
filetypes=[("Excel Files", "*.xlsx *.xls")]
)
)).pack(side="left")
์ฌ์ฉ์๊ฐ Excel ์ํฌ๋ถ์ ์ ํํ ์ ์๋๋ก ํฉ๋๋ค.
7๏ธโฃ ์ ๋ฆฌ ์ ์ด ๋ฒํผ
start_btn = tb.Button(row2,
text="๐ก CLEAN DATA",
bootstyle="success")
stop_btn = tb.Button(row2,
text="๐ STOP",
bootstyle="danger-outline",
state="disabled")
- CLEAN DATA โ ๋ฐฑ๊ทธ๋ผ์ด๋ ์ค๋ ๋๋ฅผ ์์ํ์ฌ ์ ๋ฆฌ ์์ง์ ์คํํฉ๋๋ค.
- STOP โ ์ฒ๋ฆฌ๋ฅผ ์์ ํ๊ฒ ์ค๋จํฉ๋๋ค.
8๏ธโฃ Results Table (Treeview)
cols = (
"column", "original_type", "suggested_type",
"cleaned_type", "missing_values",
"duplicates_detected", "heuristic_score",
"rename_suggestion"
)
tree = tb.Treeview(row3, columns=cols, show="headings")
์ด๋ณ ๊ฑด๊ฐ ๋ถ์์ ํ์ํฉ๋๋ค.
9๏ธโฃ ํด๋ฆฌ์คํฑ ์ ์ ์ฒด๊ณ
def heuristic_score(missing, duplicates, type_issue):
"""Return a risk score from 0โ100."""
score = 0
score += min(30, missing * 2) # Missing values (max 30)
score += min(30, duplicates * 2) # Duplicates (max 30)
score += 40 if type_issue else 0 # Typeโmismatch (max 40)
return min(score, 100)
| ์ ์ | ์ํ ์์ค | ์งํ |
|---|---|---|
| 0โ30 | ์ ์ | ๐ข |
| 31โ70 | ๋ณดํต | ๐ |
| 71โ100 | ๊ณ ์ํ | ๐ด |
๐ ๋ฐ์ดํฐโํด๋ฆฌ๋ ์์ง
def assess_and_clean(df):
"""Iterate over columns, assess health, and clean data inโplace."""
for col in df.columns:
series = df[col]
# ---------- Numeric Columns ----------
coerced = pd.to_numeric(series, errors="coerce")
if coerced.notna().any(): # At least one numeric value
cleaned_series = coerced.fillna(coerced.mean())
df[col] = cleaned_series
continue
# ---------- Text Columns ----------
cleaned_series = series.astype("string").fillna(series.mode()[0])
df[col] = cleaned_series
ํต์ฌ ์์
- Numeric columns โ ์ซ์๋ก ๊ฐ์ ๋ณํํ๊ณ , ๊ฒฐ์ธก๊ฐ์ ํด๋น ์ด์ ํ๊ท ์ผ๋ก ์ฑ์๋๋ค.
- Text columns โ ๋ฌธ์์ด ํ์ ์ผ๋ก ๋ณํํ๊ณ , ๊ฒฐ์ธก๊ฐ์ ๊ฐ์ฅ ๋น๋ฒํ๊ฒ ๋ํ๋๋ ๊ฐ(๋ชจ๋)์ผ๋ก ์ฑ์๋๋ค.
๋ค์ ๋จ๊ณ (๋ฐ์ท๋ณธ์ ํ์๋์ง ์์)
- ํด๋ฆฌ์คํฑ ์ ์์ ์ด๋ฆ ๋ณ๊ฒฝ ์ ์์ Treeview์ ์ฑ์ ๋ฃ๊ธฐ.
- CLEAN DATA ๋ฒํผ์ ๋ฐฐ๊ฒฝ ์ค๋ ๋์์
assess_and_clean์ ์คํํ๋๋ก ์ฐ๊ฒฐํ๊ธฐ. - ์์์ ์ ์ํ ํฌํผ ์ ํธ๋ฆฌํฐ๋ฅผ ์ฌ์ฉํด Excel, PDF, JSON, TXT ๋ด๋ณด๋ด๊ธฐ ๊ธฐ๋ฅ ๊ตฌํํ๊ธฐ.
excelguardian.log์ ์ ์ ํ ์ค๋ฅ ์ฒ๋ฆฌ์ ๋ก๊น ์ถ๊ฐํ๊ธฐ.
ํ๋ณตํ ํด๋ฆฌ๋! ๐
1๏ธโฃ ์ค๋ ๋ ๊ธฐ๋ฐ ์ ๋ฆฌ ์คํ
threading.Thread(
target=run_cleanup,
daemon=True
).start()
์ ์ค๋ ๋ฉ์ธ๊ฐ?
- UI๊ฐ ๋ฐ์์ฑ์ ์ ์งํฉ๋๋ค
- ๋์ฉ๋ Excel ํ์ผ์์ ๋ฉ์ถค์ ๋ฐฉ์งํฉ๋๋ค
2๏ธโฃ ์์์ด ํฌํจ๋ Excel ๋ด๋ณด๋ด๊ธฐ
sum_formula = f"=SUM(A2:A{ws.max_row})"
mean_formula = f"=AVERAGE(A2:A{ws.max_row})"
์๋์ผ๋ก ์ถ๊ฐ๋ฉ๋๋ค:
SUMAVERAGE
3๏ธโฃ ์กฐ๊ฑด๋ถ ์์
fill = PatternFill(start_color="FF9999", fill_type="solid")
cell.font = Font(bold=True)
์ํ๋๊ฐ ๋์ ์ด์:
- ๊ฐ์กฐ ํ์ ๐ด
- ๊ฐ๋ ์ฑ์ ์ํด ๊ตต๊ฒ ํ์
4๏ธโฃ PDF ๋ณด๊ณ ์ ๋ด๋ณด๋ด๊ธฐ
def score_color(score):
if score >= 71:
return red
elif score >= 31:
return orange
else:
return green
๋ค์ค ํ์ด์ง PDF ๊ฐ์ฌ ๋ณด๊ณ ์๋ฅผ ๋ค์๊ณผ ๊ฐ์ด ์์ฑํฉ๋๋ค:
- ์์์ผ๋ก ๊ตฌ๋ถ๋ ์ ์
- ์ด ์์ฝ
- ํ์ด์ง ๋ฒํธ
5๏ธโฃ ์ ๋ณด ๋ฐ ๋์๋ง ์ฐฝ
tb.Label(frame, text="How to Use", font=("Segoe UI", 12, "bold"))
Provides:
- Feature overview
- Usage steps
- Developer info
๐ ์ต์ข ๊ฒฐ๊ณผ
์ด์ ๋ค์์ ๊ฐ๊ฒ ๋ฉ๋๋ค:
- ์ ๋ฌธ์ ์ธ Excel ํด๋ฆฌ๋
- ๋ฐ์คํฌํฑ GUI
- ํด๋ฆฌ์คํฑ ์ ์ ์์คํ
- ๋ค์ค ํ์ ๋ด๋ณด๋ด๊ธฐ
๐ ๋ค์ ๊ฐ์ ์ฌํญ
- ์ํธ๋ณ ์ ํ ์ถ๊ฐ
- ์ฐจํธ ์ถ๊ฐ (๋ฐ์ดํฐ ๊ฑด๊ฐ ์ถ์ธ)
- ์ฌ์ฉ์ ํ๋ฆฌ์ ์ ์ฅ
.exeํ์ผ๋ก ํจํค์ง
