Files
PythonApp/dataProcess/dataProcess_sightml_V1.py

811 lines
31 KiB
Python
Raw Normal View History

2026-02-02 15:19:30 +08:00
import pandas as pd
import tkinter as tk
from tkinter import filedialog
import os
from datetime import datetime
import numpy as np
class DataProcessor:
def __init__(self):
self.data = None
self.filename = None
self.file_path = None
self.file_dir = None
self.processing_start_time = None
def select_file(self):
"""手动选择数据文件"""
print("🔍 打开文件选择对话框...")
root = tk.Tk()
root.withdraw()
self.file_path = filedialog.askopenfilename(
title="选择数据文件",
filetypes=[("Excel files", "*.xlsx"), ("CSV files", "*.csv"), ("All files", "*.*")]
)
if self.file_path:
self.filename = os.path.basename(self.file_path)
self.file_dir = os.path.dirname(self.file_path)
print(f"✅ 已选择文件: {self.filename}")
print(f"📁 文件所在目录: {self.file_dir}")
return True
else:
print("❌ 未选择文件")
return False
def _load_data(self):
"""加载数据文件"""
print("📥 开始加载数据文件...")
try:
if self.file_path.endswith('.csv'):
self.data = pd.read_csv(self.file_path)
print("✅ 成功加载CSV文件")
elif self.file_path.endswith('.xlsx'):
self.data = pd.read_excel(self.file_path)
print("✅ 成功加载Excel文件")
else:
raise ValueError("不支持的文件格式")
print(f"📊 数据文件形状: {self.data.shape}")
print(f"📋 数据列名: {list(self.data.columns)[:10]}...")
# 显示数据预览
print("\n📋 数据预览前3行:")
print(self.data.head(3))
# 显示列数据类型
print("\n📊 列数据类型:")
for col in self.data.columns[:10]:
print(f" {col}: {self.data[col].dtype}")
except Exception as e:
print(f"❌ 加载数据文件时出错: {e}")
raise
def _validate_data(self):
"""验证数据完整性"""
print("🔍 验证数据完整性...")
# 检查必要的测量列
required_measure_columns = ['PAD ID', 'Component ID', 'Height(mil)', 'Volume(%)', 'Area(%)']
missing_measure_columns = [col for col in required_measure_columns if col not in self.data.columns]
if missing_measure_columns:
error_msg = f"❌ 数据文件中缺少必要的测量列: {missing_measure_columns}"
print(error_msg)
raise ValueError(error_msg)
# 检查上下限列
required_limit_columns = ['Height_Low(mil)', 'Height_High(mil)', 'Vol_Min(%)', 'Vol_Max(%)', 'Area_Min(%)',
'Area_Max(%)']
missing_limit_columns = [col for col in required_limit_columns if col not in self.data.columns]
if missing_limit_columns:
error_msg = f"❌ 数据文件中缺少必要的上下限列: {missing_limit_columns}"
print(error_msg)
raise ValueError(error_msg)
print("✅ 数据验证通过")
# 检查数据是否存在空值
all_columns = required_measure_columns + required_limit_columns
null_counts = self.data[all_columns].isnull().sum()
if null_counts.any():
print(f"⚠️ 数据中存在空值:")
for col, count in null_counts[null_counts > 0].items():
print(f" {col}: {count} 个空值")
else:
print("✅ 所有必需列都没有空值")
# 显示数据统计信息
print("\n📊 数据统计信息:")
for col in required_measure_columns:
if col in self.data.columns:
# 检查列的数据类型,针对不同类型使用不同的格式化方式
if pd.api.types.is_numeric_dtype(self.data[col]):
valid_count = self.data[col].count()
if valid_count > 0:
min_val = self.data[col].min()
max_val = self.data[col].max()
print(f" {col}: {valid_count} 个有效值, 范围 {min_val:.4f} - {max_val:.4f}")
else:
print(f" {col}: 0 个有效值")
else:
# 非数值型列:显示唯一值和示例
unique_count = self.data[col].nunique()
sample_values = self.data[col].dropna().head(3).tolist()
print(
f" {col}: {self.data[col].count()} 个有效值, {unique_count} 个唯一值, 示例: {sample_values}")
# 检查并转换数据类型
print("\n🔄 数据类型检查与转换:")
numeric_columns = ['Height(mil)', 'Volume(%)', 'Area(%)',
'Height_Low(mil)', 'Height_High(mil)',
'Vol_Min(%)', 'Vol_Max(%)', 'Area_Min(%)', 'Area_Max(%)']
for col in numeric_columns:
if col in self.data.columns:
if not pd.api.types.is_numeric_dtype(self.data[col]):
try:
# 尝试转换为数值类型
original_count = self.data[col].count()
self.data[col] = pd.to_numeric(self.data[col], errors='coerce')
converted_count = self.data[col].count()
lost_data = original_count - converted_count
if lost_data > 0:
print(f" ⚠️ {col}: 转换后丢失 {lost_data} 个非数值数据")
else:
print(f"{col}: 成功转换为数值类型")
except Exception as e:
print(f"{col}: 类型转换失败 - {e}")
else:
valid_count = self.data[col].count()
print(f"{col}: 已经是数值类型, {valid_count} 个有效值")
def _print_progress(self, message, level=1):
"""打印进度信息,支持分级显示"""
indent = " " * level
timestamp = datetime.now().strftime("%H:%M:%S")
print(f"{timestamp} {indent}{message}")
def generate_report(self):
"""生成统计报告"""
if self.data is None:
raise ValueError("请先选择数据文件")
try:
self.processing_start_time = datetime.now()
print(f"\n🚀 开始生成报告 - {self.processing_start_time.strftime('%Y-%m-%d %H:%M:%S')}")
# 验证数据
self._validate_data()
self._print_progress("开始数据处理...", 1)
# 创建分组键
self._print_progress("创建分组键...", 2)
# 确保PAD ID和Component ID都是字符串类型
self.data['PAD ID'] = self.data['PAD ID'].astype(str)
self.data['Component ID'] = self.data['Component ID'].astype(str)
self.data['Group_Key'] = self.data['PAD ID'] + '_' + self.data['Component ID']
group_count = self.data['Group_Key'].nunique()
self._print_progress(f"共发现 {group_count} 个分组", 2)
# 显示分组信息
group_info = self.data['Group_Key'].value_counts()
self._print_progress(f"分组数据量统计:", 2)
for i, (group, count) in enumerate(group_info.head(5).items()):
self._print_progress(f" {group}: {count} 个数据点", 3)
if len(group_info) > 5:
self._print_progress(f" ... 还有 {len(group_info) - 5} 个分组", 3)
# 检查数值列是否存在NaN值
numeric_columns = ['Height(mil)', 'Volume(%)', 'Area(%)']
for col in numeric_columns:
if col in self.data.columns:
nan_count = self.data[col].isna().sum()
if nan_count > 0:
self._print_progress(f"⚠️ {col}{nan_count} 个空值,将在统计计算中排除", 3)
# 计算统计信息
self._print_progress("计算基本统计信息...", 2)
# 确保数值列没有无穷大值
for col in numeric_columns:
if col in self.data.columns:
inf_count = np.isinf(self.data[col]).sum()
if inf_count > 0:
self._print_progress(f"⚠️ {col}{inf_count} 个无穷大值将替换为NaN", 3)
self.data[col] = self.data[col].replace([np.inf, -np.inf], np.nan)
stats = self.data.groupby('Group_Key').agg({
'Height(mil)': ['min', 'max', 'mean', 'std'],
'Volume(%)': ['min', 'max', 'mean', 'std'],
'Area(%)': ['min', 'max', 'mean', 'std']
}).round(4)
# 重命名列
stats.columns = [
'Height_Measured_Min(mil)', 'Height_Measured_Max(mil)', 'Height_Mean(mil)', 'Height_Std(mil)',
'Volume_Measured_Min(%)', 'Volume_Measured_Max(%)', 'Volume_Mean(%)', 'Volume_Std(%)',
'Area_Measured_Min(%)', 'Area_Measured_Max(%)', 'Area_Mean(%)', 'Area_Std(%)'
]
self._print_progress("基本统计信息计算完成", 2)
# 获取上下限信息
self._print_progress("获取预设上下限信息...", 2)
limits = self.data.groupby('Group_Key').agg({
'Height_Low(mil)': 'first',
'Height_High(mil)': 'first',
'Vol_Min(%)': 'first',
'Vol_Max(%)': 'first',
'Area_Min(%)': 'first',
'Area_Max(%)': 'first'
}).round(4)
# 合并统计信息和上下限信息
stats = pd.concat([stats, limits], axis=1)
self._print_progress("上下限信息获取完成", 2)
# 计算CPK
self._print_progress("开始计算CPK值...", 2)
stats = self._calculate_cpk(stats)
# 分析CPK结果
cpk_analysis = self._analyze_cpk_results(stats)
self._print_progress("CPK分析完成", 2)
self._print_cpk_summary(cpk_analysis)
# 生成HTML报告
self._print_progress("生成HTML报告...", 2)
report_path = self._create_html_report(stats, cpk_analysis)
self._print_progress("HTML报告生成完成", 2)
# 计算处理时间
processing_time = datetime.now() - self.processing_start_time
self._print_progress(f"总处理时间: {processing_time.total_seconds():.2f}", 1)
return report_path
except Exception as e:
print(f"❌ 生成报告过程中出错: {e}")
import traceback
print(f"详细错误信息:")
traceback.print_exc()
raise
def _analyze_cpk_results(self, stats):
"""分析CPK结果"""
cpk_analysis = {
'total_groups': len(stats),
'cpk_status': {'Height': {}, 'Volume': {}, 'Area': {}},
'problematic_groups': []
}
for feature in ['Height', 'Volume', 'Area']:
cpk_col = f'{feature}_Cpk'
if cpk_col not in stats.columns:
continue
valid_cpk = stats[cpk_col].dropna()
total_valid = len(valid_cpk)
cpk_analysis['cpk_status'][feature] = {
'total': total_valid,
'excellent': len(valid_cpk[valid_cpk >= 1.33]) if total_valid > 0 else 0,
'acceptable': len(valid_cpk[(valid_cpk >= 1.0) & (valid_cpk < 1.33)]) if total_valid > 0 else 0,
'poor': len(valid_cpk[valid_cpk < 1.0]) if total_valid > 0 else 0,
'invalid': len(stats) - total_valid
}
# 识别有问题的分组任意特征的CPK < 1.0
for group_key, row in stats.iterrows():
problems = []
for feature in ['Height', 'Volume', 'Area']:
cpk_col = f'{feature}_Cpk'
if cpk_col in stats.columns and not pd.isna(row[cpk_col]):
if row[cpk_col] < 1.0:
problems.append(f"{feature}: {row[cpk_col]:.4f}")
if problems:
cpk_analysis['problematic_groups'].append({
'group_key': group_key,
'problems': problems
})
return cpk_analysis
def _print_cpk_summary(self, cpk_analysis):
"""打印CPK结果摘要"""
print("\n📈 CPK分析结果摘要:")
print("=" * 60)
for feature, status in cpk_analysis['cpk_status'].items():
total = status['total']
if total == 0:
print(f"\n{feature}: 无有效CPK数据")
continue
print(f"\n{feature}:")
excellent_pct = (status['excellent'] / total * 100) if total > 0 else 0
acceptable_pct = (status['acceptable'] / total * 100) if total > 0 else 0
poor_pct = (status['poor'] / total * 100) if total > 0 else 0
print(f" ✅ 优秀 (CPK ≥ 1.33): {status['excellent']}/{total} ({excellent_pct:.1f}%)")
print(f" ⚠️ 合格 (1.0 ≤ CPK < 1.33): {status['acceptable']}/{total} ({acceptable_pct:.1f}%)")
print(f" ❌ 不合格 (CPK < 1.0): {status['poor']}/{total} ({poor_pct:.1f}%)")
print(f" ❓ 无法计算: {status['invalid']}")
if cpk_analysis['problematic_groups']:
print(f"\n⚠️ 发现 {len(cpk_analysis['problematic_groups'])} 个有问题分组:")
for i, group in enumerate(cpk_analysis['problematic_groups'][:10]):
print(f" {i + 1}. {group['group_key']}: {', '.join(group['problems'])}")
if len(cpk_analysis['problematic_groups']) > 10:
print(f" ... 还有 {len(cpk_analysis['problematic_groups']) - 10} 个问题分组")
else:
print("\n✅ 所有分组的CPK都在合格范围内")
print("=" * 60)
def _calculate_cpk(self, stats):
"""计算CPK值"""
self._print_progress("详细计算CPK值...", 3)
def calculate_single_cpk(mean, std, usl, lsl):
"""计算单个特征的CPK"""
if pd.isna(mean) or pd.isna(std) or std == 0:
return np.nan
if pd.isna(usl) or pd.isna(lsl):
return np.nan
try:
cpu = (usl - mean) / (3 * std) if usl != float('inf') else float('inf')
cpl = (mean - lsl) / (3 * std) if lsl != float('-inf') else float('inf')
if cpu == float('inf') and cpl == float('inf'):
return np.nan
elif cpu == float('inf'):
return cpl
elif cpl == float('inf'):
return cpu
else:
return min(cpu, cpl)
except (ZeroDivisionError, TypeError):
return np.nan
# 计算每个特征的CPK
cpk_results = []
total_groups = len(stats)
for idx, row in stats.iterrows():
if len(cpk_results) % 100 == 0 and total_groups > 100:
self._print_progress(f"计算第 {len(cpk_results) + 1} 个分组的CPK...", 4)
# Height CPK
height_cpk = calculate_single_cpk(
row.get('Height_Mean(mil)', np.nan),
row.get('Height_Std(mil)', np.nan),
row.get('Height_High(mil)', np.nan),
row.get('Height_Low(mil)', np.nan)
)
# Volume CPK
volume_cpk = calculate_single_cpk(
row.get('Volume_Mean(%)', np.nan),
row.get('Volume_Std(%)', np.nan),
row.get('Vol_Max(%)', np.nan),
row.get('Vol_Min(%)', np.nan)
)
# Area CPK
area_cpk = calculate_single_cpk(
row.get('Area_Mean(%)', np.nan),
row.get('Area_Std(%)', np.nan),
row.get('Area_Max(%)', np.nan),
row.get('Area_Min(%)', np.nan)
)
cpk_results.append({
'Height_Cpk': round(height_cpk, 4) if not pd.isna(height_cpk) else np.nan,
'Volume_Cpk': round(volume_cpk, 4) if not pd.isna(volume_cpk) else np.nan,
'Area_Cpk': round(area_cpk, 4) if not pd.isna(area_cpk) else np.nan
})
# 将CPK结果添加到统计数据中
cpk_df = pd.DataFrame(cpk_results, index=stats.index)
stats = pd.concat([stats, cpk_df], axis=1)
self._print_progress(f"所有 {len(stats)} 个分组CPK计算完成", 3)
return stats
def _get_cpk_status_class(self, cpk_value):
"""根据CPK值返回状态类别"""
if pd.isna(cpk_value):
return 'cpk-invalid'
elif cpk_value >= 1.33:
return 'cpk-excellent'
elif cpk_value >= 1.0:
return 'cpk-acceptable'
else:
return 'cpk-poor'
def _create_html_report(self, stats, cpk_analysis):
"""创建完整的HTML报告"""
self._print_progress("构建HTML报告内容...", 3)
total_groups = len(stats)
# 完整的HTML模板
html_content = f"""<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<title>数据统计报告 - {self.filename}</title>
<style>
:root {{
--color-excellent: #4CAF50;
--color-acceptable: #FFC107;
--color-poor: #F44336;
--color-invalid: #9E9E9E;
}}
body {{
font-family: 'Segoe UI', Arial, sans-serif;
margin: 20px;
line-height: 1.6;
background-color: #f8f9fa;
}}
.container {{
max-width: 95%;
margin: 0 auto;
background: white;
padding: 20px;
border-radius: 10px;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
}}
h1 {{
color: #2c3e50;
border-bottom: 3px solid #3498db;
padding-bottom: 10px;
text-align: center;
}}
h2 {{
color: #34495e;
margin-top: 30px;
padding: 15px;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
border-radius: 5px;
}}
.summary {{
background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
color: white;
padding: 20px;
border-radius: 10px;
margin-bottom: 30px;
}}
.cpk-dashboard {{
display: grid;
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
gap: 20px;
margin: 20px 0;
}}
.cpk-card {{
background: white;
padding: 20px;
border-radius: 10px;
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
text-align: center;
}}
.cpk-excellent {{ background-color: var(--color-excellent); color: white; }}
.cpk-acceptable {{ background-color: var(--color-acceptable); color: black; }}
.cpk-poor {{ background-color: var(--color-poor); color: white; }}
.cpk-invalid {{ background-color: var(--color-invalid); color: white; }}
table {{
border-collapse: collapse;
width: 100%;
margin-top: 20px;
font-size: 12px;
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
background: white;
}}
th, td {{
border: 1px solid #ddd;
padding: 12px;
text-align: center;
}}
th {{
background: linear-gradient(135deg, #74b9ff 0%, #0984e3 100%);
color: white;
font-weight: bold;
position: sticky;
top: 0;
}}
tr:nth-child(even) {{ background-color: #f8f9fa; }}
tr:hover {{ background-color: #e3f2fd; }}
.limits {{
background-color: #e8f5e8;
font-weight: bold;
color: #2e7d32;
}}
.measured {{
background-color: #fff3cd;
color: #856404;
}}
.problematic-row {{
background-color: #ffebee !important;
border-left: 4px solid var(--color-poor);
}}
.warning-box {{
background: #fff3cd;
border-left: 4px solid #ffc107;
padding: 15px;
margin: 20px 0;
border-radius: 5px;
}}
.chart-container {{
margin: 20px 0;
padding: 20px;
background: white;
border-radius: 10px;
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
}}
.legend {{
display: flex;
justify-content: center;
gap: 20px;
margin: 20px 0;
flex-wrap: wrap;
}}
.legend-item {{
display: flex;
align-items: center;
gap: 5px;
padding: 5px 10px;
border-radius: 3px;
}}
.na {{ color: #999; font-style: italic; }}
</style>
</head>
<body>
<div class="container">
<h1>📊 数据统计报告 - {self.filename}</h1>
<p><strong>生成时间:</strong> {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
<p><strong>输入文件:</strong> {self.filename}</p>
<div class="summary">
<h2>📈 报告摘要</h2>
<p><strong>总分组数量:</strong> {total_groups}</p>
<p><strong>处理时间:</strong> {(datetime.now() - self.processing_start_time).total_seconds():.2f} </p>
</div>
<!-- CPK状态仪表板 -->
<div class="cpk-dashboard">
"""
# 添加CPK状态卡片
for feature, status in cpk_analysis['cpk_status'].items():
total = status['total'] + status['invalid']
if total == 0:
continue
html_content += f"""
<div class="cpk-card">
<h3>{feature} CPK状态</h3>
<div style="font-size: 2em; font-weight: bold; margin: 10px 0;">
{status['excellent'] + status['acceptable']}/{total}
</div>
<p>合格率: {(status['excellent'] + status['acceptable']) / total * 100:.1f}%</p>
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px; margin-top: 10px;">
<span class="legend-item cpk-excellent">优秀: {status['excellent']}</span>
<span class="legend-item cpk-acceptable">合格: {status['acceptable']}</span>
<span class="legend-item cpk-poor">不合格: {status['poor']}</span>
<span class="legend-item cpk-invalid">无效: {status['invalid']}</span>
</div>
</div>
"""
html_content += f"""
</div>
<!-- 问题分组警告 -->
{f'<div class="warning-box"><h3>⚠️ 发现 {len(cpk_analysis["problematic_groups"])} 个问题分组</h3><p>以下分组的CPK值低于1.0,需要重点关注</p></div>' if cpk_analysis['problematic_groups'] else ''}
<h2>📋 详细统计数据</h2>
<div class="legend">
<span class="legend-item" style="background-color: #e8f5e8;">预设上下限</span>
<span class="legend-item" style="background-color: #fff3cd;">实测值</span>
<span class="legend-item cpk-excellent">CPK 1.33</span>
<span class="legend-item cpk-acceptable">1.0 CPK < 1.33</span>
<span class="legend-item cpk-poor">CPK < 1.0</span>
</div>
<div style="overflow-x: auto;">
<table>
<thead>
<tr>
<th rowspan="2">分组标识</th>
<th colspan="7">Height(mil)</th>
<th colspan="7">Volume(%)</th>
<th colspan="7">Area(%)</th>
</tr>
<tr>
<!-- Height列标题 -->
<th class="limits">预设下限</th>
<th class="limits">预设上限</th>
<th class="measured">实测最小值</th>
<th class="measured">实测最大值</th>
<th>平均值</th>
<th>标准差</th>
<th>CPK</th>
<!-- Volume列标题 -->
<th class="limits">预设下限</th>
<th class="limits">预设上限</th>
<th class="measured">实测最小值</th>
<th class="measured">实测最大值</th>
<th>平均值</th>
<th>标准差</th>
<th>CPK</th>
<!-- Area列标题 -->
<th class="limits">预设下限</th>
<th class="limits">预设上限</th>
<th class="measured">实测最小值</th>
<th class="measured">实测最大值</th>
<th>平均值</th>
<th>标准差</th>
<th>CPK</th>
</tr>
</thead>
<tbody>
"""
# 生成表格行数据的辅助函数
def format_value(value):
if pd.isna(value):
return '<span class="na">N/A</span>'
elif isinstance(value, (int, float)):
return f"{value:.4f}"
else:
return str(value)
# 用于检查列是否存在的辅助函数
def safe_get_value(row, column_name):
"""安全获取列值如果列不存在返回N/A"""
if column_name in row.index:
return row[column_name]
else:
return np.nan
for group_key, row in stats.iterrows():
# 检查是否为问题分组
is_problematic = any(problem['group_key'] == group_key for problem in cpk_analysis['problematic_groups'])
row_class = 'class="problematic-row"' if is_problematic else ''
html_content += f"""
<tr {row_class}>
<td><strong>{group_key}</strong>{' ⚠️' if is_problematic else ''}</td>
"""
# 为每个特征生成列
for feature in ['Height', 'Volume', 'Area']:
cpk_value = safe_get_value(row, f'{feature}_Cpk')
cpk_class = self._get_cpk_status_class(cpk_value)
# 为不同特征设置正确的列名
if feature == 'Height':
lower_limit_col = 'Height_Low(mil)'
upper_limit_col = 'Height_High(mil)'
measured_min_col = 'Height_Measured_Min(mil)'
measured_max_col = 'Height_Measured_Max(mil)'
mean_col = 'Height_Mean(mil)'
std_col = 'Height_Std(mil)'
else:
lower_limit_col = f"{'Vol' if feature == 'Volume' else 'Area'}_Min(%)" # 修正Volume使用Vol_Min(%)Area使用Area_Min(%)
upper_limit_col = f"{'Vol' if feature == 'Volume' else 'Area'}_Max(%)" # 修正Volume使用Vol_Max(%)Area使用Area_Max(%)
measured_min_col = f'{feature}_Measured_Min(%)'
measured_max_col = f'{feature}_Measured_Max(%)'
mean_col = f'{feature}_Mean(%)'
std_col = f'{feature}_Std(%)'
html_content += f"""
<!-- {feature}数据 -->
<td class="limits">{format_value(safe_get_value(row, lower_limit_col))}</td>
<td class="limits">{format_value(safe_get_value(row, upper_limit_col))}</td>
<td class="measured">{format_value(safe_get_value(row, measured_min_col))}</td>
<td class="measured">{format_value(safe_get_value(row, measured_max_col))}</td>
<td>{format_value(safe_get_value(row, mean_col))}</td>
<td>{format_value(safe_get_value(row, std_col))}</td>
<td class="{cpk_class}">{format_value(cpk_value)}</td>
"""
html_content += """
</tr>"""
html_content += """
</tbody>
</table>
</div>
<div class="chart-container">
<h2>📊 CPK状态分布</h2>
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 20px;">
"""
# 添加简单的CPK分布图表
for feature, status in cpk_analysis['cpk_status'].items():
total = status['total'] + status['invalid']
if total == 0:
continue
html_content += f"""
<div>
<h3>{feature} CPK分布</h3>
<div style="background: #f8f9fa; padding: 20px; border-radius: 5px;">
<div style="display: flex; height: 30px; margin: 10px 0; border-radius: 5px; overflow: hidden;">
<div style="background: var(--color-excellent); width: {status['excellent'] / total * 100}%;"></div>
<div style="background: var(--color-acceptable); width: {status['acceptable'] / total * 100}%;"></div>
<div style="background: var(--color-poor); width: {status['poor'] / total * 100}%;"></div>
<div style="background: var(--color-invalid); width: {status['invalid'] / total * 100}%;"></div>
</div>
<div style="text-align: center;">
<small>优秀 {status['excellent']} | 合格 {status['acceptable']} | 不合格 {status['poor']} | 无效 {status['invalid']}</small>
</div>
</div>
</div>
"""
html_content += """
</div>
</div>
</div>
</body>
</html>"""
# 保存报告
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
report_filename = f"{os.path.splitext(self.filename)[0]}_report_{timestamp}.html"
report_path = os.path.join(self.file_dir, report_filename)
self._print_progress(f"保存报告到: {report_path}", 3)
with open(report_path, 'w', encoding='utf-8') as f:
f.write(html_content)
return report_path
def main():
"""主函数"""
print("=" * 60)
print("🚀 数据统计报告生成程序 - Volume上下限修复版")
print("=" * 60)
processor = DataProcessor()
try:
if processor.select_file():
processor._load_data()
report_path = processor.generate_report()
print("\n" + "=" * 60)
print("✅ 程序执行完成")
print(f"📄 统计报告生成成功: {report_path}")
print("=" * 60)
else:
print("❌ 未选择文件,程序退出")
except Exception as e:
print(f"\n❌ 程序执行失败: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
main()