1061 lines
45 KiB
Python
1061 lines
45 KiB
Python
|
|
import pandas as pd
|
|||
|
|
import tkinter as tk
|
|||
|
|
from tkinter import filedialog
|
|||
|
|
import os
|
|||
|
|
from datetime import datetime
|
|||
|
|
import numpy as np
|
|||
|
|
import matplotlib.pyplot as plt
|
|||
|
|
import seaborn as sns
|
|||
|
|
from io import BytesIO
|
|||
|
|
import base64
|
|||
|
|
import multiprocessing as mp
|
|||
|
|
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|||
|
|
import time
|
|||
|
|
import json
|
|||
|
|
import traceback
|
|||
|
|
|
|||
|
|
# 设置中文字体
|
|||
|
|
plt.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans']
|
|||
|
|
plt.rcParams['axes.unicode_minus'] = False
|
|||
|
|
|
|||
|
|
|
|||
|
|
def plot_worker(args):
|
|||
|
|
"""工作进程函数:生成单个分组的图表"""
|
|||
|
|
try:
|
|||
|
|
group_key, feature_data_dict, limits_dict = args
|
|||
|
|
|
|||
|
|
# 每个进程重新设置matplotlib配置,避免线程冲突
|
|||
|
|
plt.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans']
|
|||
|
|
plt.rcParams['axes.unicode_minus'] = False
|
|||
|
|
|
|||
|
|
results = {}
|
|||
|
|
|
|||
|
|
for feature_name, feature_data in feature_data_dict.items():
|
|||
|
|
if len(feature_data) == 0:
|
|||
|
|
results[feature_name] = ""
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
usl, lsl = limits_dict[feature_name]
|
|||
|
|
|
|||
|
|
# 创建图表
|
|||
|
|
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
|
|||
|
|
fig.suptitle(f'{group_key} - {feature_name} 统计分析', fontsize=14)
|
|||
|
|
|
|||
|
|
# 1. 直方图
|
|||
|
|
axes[0, 0].hist(feature_data, bins=15, alpha=0.7, color='skyblue', edgecolor='black')
|
|||
|
|
axes[0, 0].axvline(usl, color='red', linestyle='--', label=f'上限: {usl:.2f}', linewidth=1)
|
|||
|
|
axes[0, 0].axvline(lsl, color='green', linestyle='--', label=f'下限: {lsl:.2f}', linewidth=1)
|
|||
|
|
axes[0, 0].axvline(feature_data.mean(), color='orange', linestyle='-',
|
|||
|
|
label=f'均值: {feature_data.mean():.2f}', linewidth=1.5)
|
|||
|
|
axes[0, 0].set_title('直方图')
|
|||
|
|
axes[0, 0].set_xlabel(feature_name)
|
|||
|
|
axes[0, 0].set_ylabel('频数')
|
|||
|
|
axes[0, 0].legend(fontsize=8)
|
|||
|
|
axes[0, 0].grid(True, alpha=0.3)
|
|||
|
|
|
|||
|
|
# 2. 箱线图
|
|||
|
|
sns.boxplot(y=feature_data, ax=axes[0, 1], color='lightblue')
|
|||
|
|
axes[0, 1].axhline(usl, color='red', linestyle='--', label=f'上限: {usl:.2f}', linewidth=1)
|
|||
|
|
axes[0, 1].axhline(lsl, color='green', linestyle='--', label=f'下限: {lsl:.2f}', linewidth=1)
|
|||
|
|
axes[0, 1].set_title('箱线图')
|
|||
|
|
axes[0, 1].set_ylabel(feature_name)
|
|||
|
|
axes[0, 1].legend(fontsize=8)
|
|||
|
|
axes[0, 1].grid(True, alpha=0.3)
|
|||
|
|
|
|||
|
|
# 3. 序列图
|
|||
|
|
axes[1, 0].plot(range(len(feature_data)), feature_data, 'o-', color='blue',
|
|||
|
|
alpha=0.7, markersize=3, linewidth=1)
|
|||
|
|
axes[1, 0].axhline(usl, color='red', linestyle='--', label=f'上限: {usl:.2f}', linewidth=1)
|
|||
|
|
axes[1, 0].axhline(lsl, color='green', linestyle='--', label=f'下限: {lsl:.2f}', linewidth=1)
|
|||
|
|
axes[1, 0].axhline(feature_data.mean(), color='orange', linestyle='-',
|
|||
|
|
label=f'均值: {feature_data.mean():.2f}', linewidth=1.5)
|
|||
|
|
axes[1, 0].set_title('序列图')
|
|||
|
|
axes[1, 0].set_xlabel('数据点序号')
|
|||
|
|
axes[1, 0].set_ylabel(feature_name)
|
|||
|
|
axes[1, 0].legend(fontsize=8)
|
|||
|
|
axes[1, 0].grid(True, alpha=0.3)
|
|||
|
|
|
|||
|
|
# 4. 概率密度图
|
|||
|
|
sns.kdeplot(feature_data, ax=axes[1, 1], color='blue', fill=True, alpha=0.5)
|
|||
|
|
axes[1, 1].axvline(usl, color='red', linestyle='--', label=f'上限: {usl:.2f}', linewidth=1)
|
|||
|
|
axes[1, 1].axvline(lsl, color='green', linestyle='--', label=f'下限: {lsl:.2f}', linewidth=1)
|
|||
|
|
axes[1, 1].axvline(feature_data.mean(), color='orange', linestyle='-',
|
|||
|
|
label=f'均值: {feature_data.mean():.2f}', linewidth=1.5)
|
|||
|
|
axes[1, 1].set_title('概率密度图')
|
|||
|
|
axes[1, 1].set_xlabel(feature_name)
|
|||
|
|
axes[1, 1].set_ylabel('密度')
|
|||
|
|
axes[1, 1].legend(fontsize=8)
|
|||
|
|
axes[1, 1].grid(True, alpha=0.3)
|
|||
|
|
|
|||
|
|
plt.tight_layout()
|
|||
|
|
|
|||
|
|
# 转换为base64
|
|||
|
|
buffer = BytesIO()
|
|||
|
|
plt.savefig(buffer, format='png', dpi=80, bbox_inches='tight')
|
|||
|
|
buffer.seek(0)
|
|||
|
|
image_base64 = base64.b64encode(buffer.getvalue()).decode()
|
|||
|
|
plt.close(fig)
|
|||
|
|
|
|||
|
|
results[feature_name] = image_base64
|
|||
|
|
|
|||
|
|
return group_key, results
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ 图表生成失败 {group_key}: {e}")
|
|||
|
|
print(f" 错误详情: {traceback.format_exc()}")
|
|||
|
|
return group_key, {}
|
|||
|
|
|
|||
|
|
|
|||
|
|
class DataProcessor:
|
|||
|
|
def __init__(self):
|
|||
|
|
self.data = None
|
|||
|
|
self.filename = None
|
|||
|
|
self.file_path = None
|
|||
|
|
self.file_dir = None # 新增:存储输入文件所在目录
|
|||
|
|
self.stats = None
|
|||
|
|
self.output_dir = None
|
|||
|
|
self.progress_file = None
|
|||
|
|
|
|||
|
|
def select_file(self):
|
|||
|
|
"""手动选择数据文件"""
|
|||
|
|
print("打开文件选择对话框...")
|
|||
|
|
root = tk.Tk()
|
|||
|
|
root.withdraw()
|
|||
|
|
|
|||
|
|
self.file_path = filedialog.askopenfilename(
|
|||
|
|
title="选择数据文件",
|
|||
|
|
filetypes=[("Excel files", "*.xlsx"), ("CSV files", "*.csv"), ("All files", "*.*")]
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
if self.file_path:
|
|||
|
|
self.filename = os.path.basename(self.file_path)
|
|||
|
|
self.file_dir = os.path.dirname(self.file_path) # 获取文件所在目录
|
|||
|
|
print(f"✅ 已选择文件: {self.filename}")
|
|||
|
|
print(f"📁 文件所在目录: {self.file_dir}")
|
|||
|
|
return True
|
|||
|
|
else:
|
|||
|
|
print("❌ 未选择文件")
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
def _load_data(self):
|
|||
|
|
"""加载数据文件"""
|
|||
|
|
print("开始加载数据文件...")
|
|||
|
|
try:
|
|||
|
|
if self.file_path.endswith('.csv'):
|
|||
|
|
self.data = pd.read_csv(self.file_path)
|
|||
|
|
print("✅ 成功加载CSV文件")
|
|||
|
|
elif self.file_path.endswith('.xlsx'):
|
|||
|
|
self.data = pd.read_excel(self.file_path)
|
|||
|
|
print("✅ 成功加载Excel文件")
|
|||
|
|
else:
|
|||
|
|
raise ValueError("不支持的文件格式")
|
|||
|
|
|
|||
|
|
print(f"📊 数据文件形状: {self.data.shape}")
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ 加载数据文件时出错: {e}")
|
|||
|
|
print(f" 错误详情: {traceback.format_exc()}")
|
|||
|
|
raise
|
|||
|
|
|
|||
|
|
def _validate_data(self):
|
|||
|
|
"""验证数据完整性 - 增强验证:检查上下限列"""
|
|||
|
|
print("验证数据完整性...")
|
|||
|
|
|
|||
|
|
# 检查必要的测量列
|
|||
|
|
required_measure_columns = ['PAD ID', 'Component ID', 'Height(mil)', 'Volume(%)', 'Area(%)']
|
|||
|
|
missing_measure_columns = [col for col in required_measure_columns if col not in self.data.columns]
|
|||
|
|
|
|||
|
|
if missing_measure_columns:
|
|||
|
|
error_msg = f"数据文件中缺少必要的测量列: {missing_measure_columns}"
|
|||
|
|
print(f"❌ {error_msg}")
|
|||
|
|
raise ValueError(error_msg)
|
|||
|
|
|
|||
|
|
# 检查必要的上下限列
|
|||
|
|
required_limit_columns = ['Height_Low(mil)', 'Height_High(mil)',
|
|||
|
|
'Vol_Min(%)', 'Vol_Max(%)',
|
|||
|
|
'Area_Min(%)', 'Area_Max(%)']
|
|||
|
|
missing_limit_columns = [col for col in required_limit_columns if col not in self.data.columns]
|
|||
|
|
|
|||
|
|
if missing_limit_columns:
|
|||
|
|
error_msg = f"数据文件中缺少必要的上下限列: {missing_limit_columns}"
|
|||
|
|
print(f"❌ {error_msg}")
|
|||
|
|
raise ValueError(error_msg)
|
|||
|
|
|
|||
|
|
print("✅ 数据验证通过")
|
|||
|
|
|
|||
|
|
# 检查数据是否存在空值
|
|||
|
|
all_required_columns = required_measure_columns + required_limit_columns
|
|||
|
|
null_counts = self.data[all_required_columns].isnull().sum()
|
|||
|
|
if null_counts.any():
|
|||
|
|
print(f"⚠️ 数据中存在空值 - {null_counts[null_counts > 0].to_dict()}")
|
|||
|
|
|
|||
|
|
def _setup_output_directory(self):
|
|||
|
|
"""设置输出目录"""
|
|||
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|||
|
|
base_name = os.path.splitext(self.filename)[0]
|
|||
|
|
|
|||
|
|
# 优化:输出目录放置在输入文件所在文件夹下
|
|||
|
|
self.output_dir = os.path.join(self.file_dir, f"{base_name}_report_{timestamp}")
|
|||
|
|
|
|||
|
|
# 创建主目录
|
|||
|
|
os.makedirs(self.output_dir, exist_ok=True)
|
|||
|
|
|
|||
|
|
# 创建分组报告子目录
|
|||
|
|
os.makedirs(os.path.join(self.output_dir, 'group_reports'), exist_ok=True)
|
|||
|
|
|
|||
|
|
# 创建进度文件
|
|||
|
|
self.progress_file = os.path.join(self.output_dir, 'progress.json')
|
|||
|
|
|
|||
|
|
print(f"📁 输出目录: {self.output_dir}")
|
|||
|
|
|
|||
|
|
def _save_progress(self, completed_groups=None, current_stage=None):
|
|||
|
|
"""保存处理进度"""
|
|||
|
|
try:
|
|||
|
|
progress = {
|
|||
|
|
'filename': self.filename,
|
|||
|
|
'total_groups': len(self.stats.index) if self.stats is not None else 0,
|
|||
|
|
'completed_groups': completed_groups or [],
|
|||
|
|
'current_stage': current_stage,
|
|||
|
|
'last_update': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
|||
|
|
'input_file_directory': self.file_dir, # 记录输入文件目录
|
|||
|
|
'output_directory': self.output_dir # 记录输出目录
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
with open(self.progress_file, 'w', encoding='utf-8') as f:
|
|||
|
|
json.dump(progress, f, indent=2, ensure_ascii=False)
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"⚠️ 保存进度失败: {e}")
|
|||
|
|
|
|||
|
|
def generate_report(self):
|
|||
|
|
"""生成统计报告 - 分阶段输出"""
|
|||
|
|
if self.data is None:
|
|||
|
|
raise ValueError("请先选择数据文件")
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# 验证数据
|
|||
|
|
self._validate_data()
|
|||
|
|
|
|||
|
|
# 设置输出目录
|
|||
|
|
self._setup_output_directory()
|
|||
|
|
|
|||
|
|
print("开始数据处理...")
|
|||
|
|
|
|||
|
|
# 创建分组键
|
|||
|
|
self.data['Group_Key'] = self.data['PAD ID'].astype(str) + '_' + self.data['Component ID'].astype(str)
|
|||
|
|
group_count = self.data['Group_Key'].nunique()
|
|||
|
|
print(f"📊 共发现 {group_count} 个分组")
|
|||
|
|
|
|||
|
|
# 阶段1:快速生成基本统计信息和汇总报告
|
|||
|
|
print("\n=== 阶段1: 生成基本统计信息 ===")
|
|||
|
|
|
|||
|
|
# 计算测量数据的统计信息
|
|||
|
|
self.stats = self.data.groupby('Group_Key').agg({
|
|||
|
|
'Height(mil)': ['min', 'max', 'mean', 'std'],
|
|||
|
|
'Volume(%)': ['min', 'max', 'mean', 'std'],
|
|||
|
|
'Area(%)': ['min', 'max', 'mean', 'std']
|
|||
|
|
}).round(4)
|
|||
|
|
|
|||
|
|
# 重命名测量统计列
|
|||
|
|
self.stats.columns = [
|
|||
|
|
'Height_Measured_Min(mil)', 'Height_Measured_Max(mil)', 'Height_Mean(mil)', 'Height_Std(mil)',
|
|||
|
|
'Vol_Measured_Min(%)', 'Vol_Measured_Max(%)', 'Vol_Mean(%)', 'Vol_Std(%)',
|
|||
|
|
'Area_Measured_Min(%)', 'Area_Measured_Max(%)', 'Area_Mean(%)', 'Area_Std(%)'
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
print("基本统计信息计算完成")
|
|||
|
|
|
|||
|
|
# 获取预设的上下限信息
|
|||
|
|
print("获取预设上下限信息...")
|
|||
|
|
limits = self.data.groupby('Group_Key').agg({
|
|||
|
|
'Height_Low(mil)': 'first', # 取第一个值作为该分组的预设下限
|
|||
|
|
'Height_High(mil)': 'first', # 取第一个值作为该分组的预设上限
|
|||
|
|
'Vol_Min(%)': 'first',
|
|||
|
|
'Vol_Max(%)': 'first',
|
|||
|
|
'Area_Min(%)': 'first',
|
|||
|
|
'Area_Max(%)': 'first'
|
|||
|
|
}).round(4)
|
|||
|
|
|
|||
|
|
# 合并统计信息和预设上下限信息
|
|||
|
|
self.stats = pd.concat([self.stats, limits], axis=1)
|
|||
|
|
print("预设上下限信息获取完成")
|
|||
|
|
|
|||
|
|
# 计算CPK - 使用预设的上下限值
|
|||
|
|
print("计算CPK值...")
|
|||
|
|
self.stats = self._calculate_cpk(self.stats)
|
|||
|
|
|
|||
|
|
# 立即生成汇总报告
|
|||
|
|
summary_report_path = self._create_summary_report()
|
|||
|
|
print(f"✅ 汇总报告生成完成: {summary_report_path}")
|
|||
|
|
|
|||
|
|
# 保存Excel
|
|||
|
|
excel_path = self._save_to_excel_advanced()
|
|||
|
|
print(f"✅ Excel文件保存完成: {excel_path}")
|
|||
|
|
|
|||
|
|
# 阶段2:分批生成详细分组报告
|
|||
|
|
print("\n=== 阶段2: 分批生成详细分组报告 ===")
|
|||
|
|
self._generate_group_reports_incremental()
|
|||
|
|
|
|||
|
|
# 阶段3:生成索引文件(可选)
|
|||
|
|
print("\n=== 阶段3: 生成报告索引 ===")
|
|||
|
|
index_path = self._create_report_index()
|
|||
|
|
print(f"✅ 报告索引生成完成: {index_path}")
|
|||
|
|
|
|||
|
|
return summary_report_path
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ 程序执行失败: {e}")
|
|||
|
|
print(f" 错误详情: {traceback.format_exc()}")
|
|||
|
|
# 即使失败,也尝试保存当前进度
|
|||
|
|
if hasattr(self, 'output_dir'):
|
|||
|
|
print(f"📁 当前结果已保存到: {self.output_dir}")
|
|||
|
|
raise
|
|||
|
|
|
|||
|
|
def _create_summary_report(self):
|
|||
|
|
"""创建快速汇总报告(区分预设上下限和实测值)"""
|
|||
|
|
print("生成快速汇总报告...")
|
|||
|
|
|
|||
|
|
# 使用明确的空值检查
|
|||
|
|
if self.stats is None or len(self.stats.index) == 0:
|
|||
|
|
print("⚠️ 统计数据为空,生成空报告")
|
|||
|
|
return self._create_empty_report()
|
|||
|
|
|
|||
|
|
# 将索引转换为列表,避免DataFrame布尔判断问题
|
|||
|
|
stats_index = list(self.stats.index)
|
|||
|
|
total_groups = len(stats_index)
|
|||
|
|
|
|||
|
|
# 安全地检查CPK列是否存在
|
|||
|
|
valid_height_cpk = 0
|
|||
|
|
valid_volume_cpk = 0
|
|||
|
|
valid_area_cpk = 0
|
|||
|
|
|
|||
|
|
if 'Height_Cpk' in self.stats.columns:
|
|||
|
|
valid_height_cpk = self.stats['Height_Cpk'].notna().sum()
|
|||
|
|
if 'Volume_Cpk' in self.stats.columns:
|
|||
|
|
valid_volume_cpk = self.stats['Volume_Cpk'].notna().sum()
|
|||
|
|
if 'Area_Cpk' in self.stats.columns:
|
|||
|
|
valid_area_cpk = self.stats['Area_Cpk'].notna().sum()
|
|||
|
|
|
|||
|
|
html_content = f"""
|
|||
|
|
<!DOCTYPE html>
|
|||
|
|
<html>
|
|||
|
|
<head>
|
|||
|
|
<title>数据统计汇总报告 - {self.filename}</title>
|
|||
|
|
<style>
|
|||
|
|
body {{ font-family: Arial, sans-serif; margin: 20px; }}
|
|||
|
|
h1, h2, h3 {{ color: #333; }}
|
|||
|
|
.summary {{ background-color: #f5f5f5; padding: 15px; border-radius: 5px; margin: 20px 0; }}
|
|||
|
|
table {{ border-collapse: collapse; width: 100%; margin: 10px 0; font-size: 12px; }}
|
|||
|
|
th, td {{ border: 1px solid #ddd; padding: 8px; text-align: center; }}
|
|||
|
|
th {{ background-color: #4CAF50; color: white; }}
|
|||
|
|
.limits {{ background-color: #e8f5e8; font-weight: bold; }}
|
|||
|
|
.measured {{ background-color: #fff3cd; }}
|
|||
|
|
.info-box {{ background-color: #e7f3ff; padding: 15px; border-radius: 5px; margin: 15px 0; }}
|
|||
|
|
.nav-links {{ margin: 20px 0; }}
|
|||
|
|
.nav-links a {{ margin-right: 15px; text-decoration: none; color: #0066cc; }}
|
|||
|
|
.progress {{ background-color: #fff3cd; padding: 10px; border: 1px solid #ffeaa7; border-radius: 5px; margin: 10px 0; }}
|
|||
|
|
.warning {{ color: #856404; background-color: #fff3cd; padding: 5px; border-radius: 3px; }}
|
|||
|
|
</style>
|
|||
|
|
</head>
|
|||
|
|
<body>
|
|||
|
|
<h1>数据统计汇总报告 - {self.filename}</h1>
|
|||
|
|
<p>生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
|
|||
|
|
<p>输入文件位置: <code>{self.file_dir}</code></p>
|
|||
|
|
|
|||
|
|
<div class="info-box">
|
|||
|
|
<h3>报告说明</h3>
|
|||
|
|
<p>此报告为快速生成的汇总报告,包含所有分组的基本统计信息。</p>
|
|||
|
|
<p>CPK计算使用<strong>预设的上下限值</strong>,而不是实测的最小最大值。</p>
|
|||
|
|
<p class="warning">注意:分组详细报告可能需要较长时间生成,请勿关闭程序。</p>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
<div class="progress">
|
|||
|
|
<h3>处理进度</h3>
|
|||
|
|
<p>总分组数量: <strong>{total_groups}</strong></p>
|
|||
|
|
<p>有效Height CPK数量: <strong>{valid_height_cpk}</strong></p>
|
|||
|
|
<p>有效Volume CPK数量: <strong>{valid_volume_cpk}</strong></p>
|
|||
|
|
<p>有效Area CPK数量: <strong>{valid_area_cpk}</strong></p>
|
|||
|
|
<p>输出目录: <code>{self.output_dir}</code></p>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
<div class="nav-links">
|
|||
|
|
<a href="group_reports/index.html">查看分组报告索引</a>
|
|||
|
|
<a href="statistics.xlsx">下载Excel数据</a>
|
|||
|
|
<a href="progress.json">查看处理进度</a>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
<h2>详细统计数据</h2>
|
|||
|
|
<table>
|
|||
|
|
<thead>
|
|||
|
|
<tr>
|
|||
|
|
<th rowspan="2">分组标识<br>(PAD ID + Component ID)</th>
|
|||
|
|
<th colspan="8">Height(mil)</th>
|
|||
|
|
<th colspan="8">Volume(%)</th>
|
|||
|
|
<th colspan="8">Area(%)</th>
|
|||
|
|
{'<th colspan="3">CPK值</th>' if 'Height_Cpk' in self.stats.columns else ''}
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<!-- Height列标题 -->
|
|||
|
|
<th class="limits">预设下限<br>(LSL)</th>
|
|||
|
|
<th class="limits">预设上限<br>(USL)</th>
|
|||
|
|
<th class="measured">实测最小值</th>
|
|||
|
|
<th class="measured">实测最大值</th>
|
|||
|
|
<th>平均值</th>
|
|||
|
|
<th>标准差</th>
|
|||
|
|
<th>数据点数</th>
|
|||
|
|
<th>CPK</th>
|
|||
|
|
<!-- Volume列标题 -->
|
|||
|
|
<th class="limits">预设下限<br>(LSL)</th>
|
|||
|
|
<th class="limits">预设上限<br>(USL)</th>
|
|||
|
|
<th class="measured">实测最小值</th>
|
|||
|
|
<th class="measured">实测最大值</th>
|
|||
|
|
<th>平均值</th>
|
|||
|
|
<th>标准差</th>
|
|||
|
|
<th>数据点数</th>
|
|||
|
|
<th>CPK</th>
|
|||
|
|
<!-- Area列标题 -->
|
|||
|
|
<th class="limits">预设下限<br>(LSL)</th>
|
|||
|
|
<th class="limits">预设上限<br>(USL)</th>
|
|||
|
|
<th class="measured">实测最小值</th>
|
|||
|
|
<th class="measured">实测最大值</th>
|
|||
|
|
<th>平均值</th>
|
|||
|
|
<th>标准差</th>
|
|||
|
|
<th>数据点数</th>
|
|||
|
|
<th>CPK</th>
|
|||
|
|
<!-- 分组标识 -->
|
|||
|
|
<th>分组</th>
|
|||
|
|
</tr>
|
|||
|
|
</thead>
|
|||
|
|
<tbody>
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
# 生成表格行数据
|
|||
|
|
for group_key in stats_index:
|
|||
|
|
row = self.stats.loc[group_key]
|
|||
|
|
|
|||
|
|
def format_value(value):
|
|||
|
|
"""格式化数值显示"""
|
|||
|
|
if pd.isna(value):
|
|||
|
|
return 'N/A'
|
|||
|
|
elif isinstance(value, (int, float)):
|
|||
|
|
return f"{value:.4f}"
|
|||
|
|
else:
|
|||
|
|
return str(value)
|
|||
|
|
|
|||
|
|
# 获取数据点数
|
|||
|
|
group_data = self.data[self.data['Group_Key'] == group_key]
|
|||
|
|
data_count = len(group_data)
|
|||
|
|
|
|||
|
|
# 安全处理CPK列
|
|||
|
|
cpk_columns = {"height": "", "volume": "", "area": ""}
|
|||
|
|
if 'Height_Cpk' in self.stats.columns:
|
|||
|
|
cpk_columns = {
|
|||
|
|
"height": f"""<td>{format_value(row['Height_Cpk'])}</td>""",
|
|||
|
|
"volume": f"""<td>{format_value(row['Volume_Cpk'])}</td>""",
|
|||
|
|
"area": f"""<td>{format_value(row['Area_Cpk'])}</td>"""
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 为CPK值添加颜色标识
|
|||
|
|
def get_cpk_color(cpk_value):
|
|||
|
|
"""根据CPK值返回颜色标识"""
|
|||
|
|
if pd.isna(cpk_value):
|
|||
|
|
return ''
|
|||
|
|
try:
|
|||
|
|
cpk_val = float(cpk_value)
|
|||
|
|
if cpk_val >= 1.33:
|
|||
|
|
return 'style="background-color: #90EE90;"' # 绿色 - 优秀
|
|||
|
|
elif cpk_val >= 1.0:
|
|||
|
|
return 'style="background-color: #FFFFE0;"' # 黄色 - 合格
|
|||
|
|
else:
|
|||
|
|
return 'style="background-color: #FFB6C1;"' # 红色 - 不合格
|
|||
|
|
except:
|
|||
|
|
return ''
|
|||
|
|
|
|||
|
|
# 如果存在CPK列,添加颜色
|
|||
|
|
if 'Height_Cpk' in self.stats.columns:
|
|||
|
|
# 这里需要为每个CPK单元格单独设置颜色
|
|||
|
|
height_color = get_cpk_color(row['Height_Cpk'])
|
|||
|
|
volume_color = get_cpk_color(row['Volume_Cpk'])
|
|||
|
|
area_color = get_cpk_color(row['Area_Cpk'])
|
|||
|
|
|
|||
|
|
cpk_columns = {
|
|||
|
|
"height": f"""<td {height_color}>{format_value(row['Height_Cpk'])}</td>""",
|
|||
|
|
"volume": f"""<td {volume_color}>{format_value(row['Volume_Cpk'])}</td>""",
|
|||
|
|
"area": f"""<td {area_color}>{format_value(row['Area_Cpk'])}</td>"""
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
html_content += f"""
|
|||
|
|
<tr>
|
|||
|
|
<td><a href="group_reports/{self._sanitize_filename(group_key)}.html" target="_blank">{group_key}</a></td>
|
|||
|
|
<!-- Height数据 -->
|
|||
|
|
<td class="limits">{format_value(row['Height_Low(mil)'])}</td>
|
|||
|
|
<td class="limits">{format_value(row['Height_High(mil)'])}</td>
|
|||
|
|
<td class="measured">{format_value(row['Height_Measured_Min(mil)'])}</td>
|
|||
|
|
<td class="measured">{format_value(row['Height_Measured_Max(mil)'])}</td>
|
|||
|
|
<td>{format_value(row['Height_Mean(mil)'])}</td>
|
|||
|
|
<td>{format_value(row['Height_Std(mil)'])}</td>
|
|||
|
|
<td>{data_count}</td>
|
|||
|
|
{cpk_columns["height"]}
|
|||
|
|
<!-- Volume数据 -->
|
|||
|
|
<td class="limits">{format_value(row['Vol_Min(%)'])}</td>
|
|||
|
|
<td class="limits">{format_value(row['Vol_Max(%)'])}</td>
|
|||
|
|
<td class="measured">{format_value(row['Vol_Measured_Min(%)'])}</td>
|
|||
|
|
<td class="measured">{format_value(row['Vol_Measured_Max(%)'])}</td>
|
|||
|
|
<td>{format_value(row['Vol_Mean(%)'])}</td>
|
|||
|
|
<td>{format_value(row['Vol_Std(%)'])}</td>
|
|||
|
|
<td>{data_count}</td>
|
|||
|
|
{cpk_columns["volume"]}
|
|||
|
|
<!-- Area数据 -->
|
|||
|
|
<td class="limits">{format_value(row['Area_Min(%)'])}</td>
|
|||
|
|
<td class="limits">{format_value(row['Area_Max(%)'])}</td>
|
|||
|
|
<td class="measured">{format_value(row['Area_Measured_Min(%)'])}</td>
|
|||
|
|
<td class="measured">{format_value(row['Area_Measured_Max(%)'])}</td>
|
|||
|
|
<td>{format_value(row['Area_Mean(%)'])}</td>
|
|||
|
|
<td>{format_value(row['Area_Std(%)'])}</td>
|
|||
|
|
<td>{data_count}</td>
|
|||
|
|
{cpk_columns["area"]}
|
|||
|
|
<!-- 分组标识 -->
|
|||
|
|
<td>{group_key}</td>
|
|||
|
|
</tr>
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
html_content += """
|
|||
|
|
</tbody>
|
|||
|
|
</table>
|
|||
|
|
|
|||
|
|
<div class="info-box">
|
|||
|
|
<h3>表格说明</h3>
|
|||
|
|
<p><span class="limits" style="padding: 2px 5px;">绿色背景</span>: 预设的上下限值(用于CPK计算)</p>
|
|||
|
|
<p><span class="measured" style="padding: 2px 5px;">黄色背景</span>: 实测数据的最小最大值</p>
|
|||
|
|
<p>白色背景: 统计计算值</p>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
<div class="info-box">
|
|||
|
|
<h3>CPK计算说明</h3>
|
|||
|
|
<p><strong>CPK计算公式:</strong> CPK = min[(USL - mean) / (3×std), (mean - LSL) / (3×std)]</p>
|
|||
|
|
<p><strong>上下限取值:</strong> 使用数据文件中的预设上下限值,而不是实测的最小最大值</p>
|
|||
|
|
<p><span style="background-color: #90EE90; padding: 2px 5px;">绿色</span> CPK ≥ 1.33 (过程能力优秀)</p>
|
|||
|
|
<p><span style="background-color: #FFFFE0; padding: 2px 5px;">黄色</span> 1.0 ≤ CPK < 1.33 (过程能力合格)</p>
|
|||
|
|
<p><span style="background-color: #FFB6C1; padding: 2px 5px;">红色</span> CPK < 1.0 (过程能力不足)</p>
|
|||
|
|
</div>
|
|||
|
|
</body>
|
|||
|
|
</html>
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
report_path = os.path.join(self.output_dir, 'summary_report.html')
|
|||
|
|
with open(report_path, 'w', encoding='utf-8') as f:
|
|||
|
|
f.write(html_content)
|
|||
|
|
|
|||
|
|
print(f"✅ 汇总报告已生成: {report_path}")
|
|||
|
|
return report_path
|
|||
|
|
|
|||
|
|
def _create_empty_report(self):
|
|||
|
|
"""创建空数据报告"""
|
|||
|
|
html_content = f"""
|
|||
|
|
<!DOCTYPE html>
|
|||
|
|
<html>
|
|||
|
|
<head>
|
|||
|
|
<title>数据统计报告 - {self.filename}</title>
|
|||
|
|
<style>
|
|||
|
|
body {{ font-family: Arial, sans-serif; margin: 20px; }}
|
|||
|
|
.warning {{ color: #856404; background-color: #fff3cd; padding: 20px; border-radius: 5px; }}
|
|||
|
|
</style>
|
|||
|
|
</head>
|
|||
|
|
<body>
|
|||
|
|
<h1>数据统计报告 - {self.filename}</h1>
|
|||
|
|
<div class="warning">
|
|||
|
|
<h2>⚠️ 数据为空</h2>
|
|||
|
|
<p>未找到有效数据或统计数据为空。</p>
|
|||
|
|
<p>生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
|
|||
|
|
<p>输入文件位置: <code>{self.file_dir}</code></p>
|
|||
|
|
</div>
|
|||
|
|
</body>
|
|||
|
|
</html>
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
report_path = os.path.join(self.output_dir, 'summary_report.html')
|
|||
|
|
with open(report_path, 'w', encoding='utf-8') as f:
|
|||
|
|
f.write(html_content)
|
|||
|
|
|
|||
|
|
return report_path
|
|||
|
|
|
|||
|
|
def _sanitize_filename(self, filename):
|
|||
|
|
"""清理文件名,移除非法字符"""
|
|||
|
|
import re
|
|||
|
|
return re.sub(r'[<>:"/\\|?*]', '_', filename)
|
|||
|
|
|
|||
|
|
def _generate_group_reports_incremental(self):
|
|||
|
|
"""分批生成分组报告,避免长时间等待"""
|
|||
|
|
# 使用明确的空值检查方法
|
|||
|
|
if self.stats is None or len(self.stats.index) == 0:
|
|||
|
|
print("⚠️ 统计数据为空,跳过分组报告生成")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
stats_index = list(self.stats.index)
|
|||
|
|
total_groups = len(stats_index)
|
|||
|
|
|
|||
|
|
if total_groups == 0:
|
|||
|
|
print("⚠️ 没有有效的分组数据")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
print(f"📊 开始分批生成 {total_groups} 个分组报告...")
|
|||
|
|
print(f"📁 分组报告将保存到: {os.path.join(self.output_dir, 'group_reports')}")
|
|||
|
|
|
|||
|
|
# 分批处理
|
|||
|
|
BATCH_SIZE = min(20, total_groups)
|
|||
|
|
completed_groups = []
|
|||
|
|
total_batches = (total_groups + BATCH_SIZE - 1) // BATCH_SIZE
|
|||
|
|
|
|||
|
|
for batch_idx in range(total_batches):
|
|||
|
|
batch_start = batch_idx * BATCH_SIZE
|
|||
|
|
batch_end = min((batch_idx + 1) * BATCH_SIZE, total_groups)
|
|||
|
|
batch_groups = stats_index[batch_start:batch_end]
|
|||
|
|
|
|||
|
|
print(f"\n🔄 处理批次 {batch_idx + 1}/{total_batches}: 分组 {batch_start + 1}-{batch_end}")
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
batch_results = self._process_batch(batch_groups)
|
|||
|
|
|
|||
|
|
# 生成当前批次的分组报告
|
|||
|
|
successful_reports = 0
|
|||
|
|
for group_key in batch_groups:
|
|||
|
|
try:
|
|||
|
|
self._create_single_group_report(group_key, batch_results.get(group_key, {}))
|
|||
|
|
completed_groups.append(group_key)
|
|||
|
|
successful_reports += 1
|
|||
|
|
print(f" ✅ 分组报告生成: {self._sanitize_filename(group_key)}.html")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" ❌ 生成分组 {group_key} 报告失败: {e}")
|
|||
|
|
print(f" 错误详情: {traceback.format_exc()}")
|
|||
|
|
|
|||
|
|
# 保存进度
|
|||
|
|
self._save_progress(completed_groups, f"batch_{batch_idx + 1}")
|
|||
|
|
|
|||
|
|
print(f"✅ 批次 {batch_idx + 1} 完成 (成功生成 {successful_reports}/{len(batch_groups)} 个报告)")
|
|||
|
|
|
|||
|
|
except Exception as batch_error:
|
|||
|
|
print(f"❌ 批次 {batch_idx + 1} 处理失败: {batch_error}")
|
|||
|
|
print(f" 错误详情: {traceback.format_exc()}")
|
|||
|
|
# 继续处理下一批次
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# 添加批次间隔,避免资源竞争
|
|||
|
|
if batch_idx < total_batches - 1:
|
|||
|
|
print("⏳ 等待2秒后处理下一批次...")
|
|||
|
|
time.sleep(2)
|
|||
|
|
|
|||
|
|
print(f"✅ 所有分组报告生成完成 (总计: {len(completed_groups)}/{total_groups})")
|
|||
|
|
print(f"📁 分组报告保存位置: {os.path.join(self.output_dir, 'group_reports')}")
|
|||
|
|
|
|||
|
|
def _process_batch(self, group_keys):
|
|||
|
|
"""处理单个批次的分组"""
|
|||
|
|
if not group_keys: # 明确的空列表检查
|
|||
|
|
print("⚠️ 当前批次没有分组数据")
|
|||
|
|
return {}
|
|||
|
|
|
|||
|
|
tasks = []
|
|||
|
|
for group_key in group_keys:
|
|||
|
|
# 问题修正:使用明确的检查方法
|
|||
|
|
stats_index_list = list(self.stats.index) # 转换为列表
|
|||
|
|
if group_key not in stats_index_list:
|
|||
|
|
print(f"⚠️ 警告: 分组 {group_key} 不在统计数据中,跳过")
|
|||
|
|
continue # 跳过不存在的分组
|
|||
|
|
|
|||
|
|
# 问题修正:避免DataFrame的布尔判断,使用明确的.empty检查
|
|||
|
|
group_data = self.data[self.data['Group_Key'] == group_key]
|
|||
|
|
if group_data.empty: # 明确的空值检查
|
|||
|
|
print(f"⚠️ 警告: 分组 {group_key} 的数据为空,跳过")
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
row = self.stats.loc[group_key]
|
|||
|
|
|
|||
|
|
# 安全地获取特征数据,添加空值检查
|
|||
|
|
feature_data_dict = {}
|
|||
|
|
for col in ['Height(mil)', 'Volume(%)', 'Area(%)']:
|
|||
|
|
col_data = group_data[col].dropna()
|
|||
|
|
if len(col_data) == 0:
|
|||
|
|
print(f"⚠️ 警告: 分组 {group_key} 的 {col} 数据为空")
|
|||
|
|
col_data = pd.Series([], dtype=float) # 创建空Series
|
|||
|
|
feature_data_dict[col] = col_data
|
|||
|
|
|
|||
|
|
# 获取预设的上下限值
|
|||
|
|
limits_dict = {}
|
|||
|
|
# 安全地获取限制值
|
|||
|
|
try:
|
|||
|
|
limits_dict = {
|
|||
|
|
'Height(mil)': (row['Height_High(mil)'], row['Height_Low(mil)']), # USL, LSL
|
|||
|
|
'Volume(%)': (row['Vol_Max(%)'], row['Vol_Min(%)']), # USL, LSL
|
|||
|
|
'Area(%)': (row['Area_Max(%)'], row['Area_Min(%)']) # USL, LSL
|
|||
|
|
}
|
|||
|
|
except KeyError as e:
|
|||
|
|
print(f"❌ 错误: 分组 {group_key} 缺少预设上下限列 {e}")
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
tasks.append((group_key, feature_data_dict, limits_dict))
|
|||
|
|
|
|||
|
|
if len(tasks) == 0: # 明确的空列表检查
|
|||
|
|
print("⚠️ 当前批次没有有效任务")
|
|||
|
|
return {}
|
|||
|
|
|
|||
|
|
# 使用多进程处理
|
|||
|
|
max_workers = min(mp.cpu_count(), len(tasks), 4)
|
|||
|
|
results = {}
|
|||
|
|
|
|||
|
|
print(f"🔧 开始处理批次中的 {len(tasks)} 个任务,使用 {max_workers} 个进程...")
|
|||
|
|
|
|||
|
|
with ProcessPoolExecutor(max_workers=max_workers) as executor:
|
|||
|
|
future_to_key = {}
|
|||
|
|
for task in tasks:
|
|||
|
|
future = executor.submit(plot_worker, task)
|
|||
|
|
future_to_key[future] = task[0]
|
|||
|
|
|
|||
|
|
completed_count = 0
|
|||
|
|
for future in as_completed(future_to_key):
|
|||
|
|
group_key = future_to_key[future]
|
|||
|
|
try:
|
|||
|
|
result_key, result_data = future.result()
|
|||
|
|
if result_key: # 明确的结果检查
|
|||
|
|
results[result_key] = result_data
|
|||
|
|
completed_count += 1
|
|||
|
|
print(f" 📈 图表生成完成: {result_key} ({completed_count}/{len(tasks)})")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" ❌ 处理分组 {group_key} 时出错: {e}")
|
|||
|
|
|
|||
|
|
print(f"✅ 批次处理完成,成功生成 {len(results)}/{len(tasks)} 个图表")
|
|||
|
|
return results
|
|||
|
|
|
|||
|
|
def _create_single_group_report(self, group_key, feature_charts):
|
|||
|
|
"""创建单个分组的独立报告"""
|
|||
|
|
# 添加明确的分组存在性检查
|
|||
|
|
stats_index_list = list(self.stats.index) # 转换为列表
|
|||
|
|
|
|||
|
|
if group_key not in stats_index_list:
|
|||
|
|
print(f"⚠️ 警告: 分组 {group_key} 不在统计数据中,跳过报告生成")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
row = self.stats.loc[group_key]
|
|||
|
|
except KeyError:
|
|||
|
|
print(f"❌ 错误: 无法获取分组 {group_key} 的统计数据")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
# 明确的空值检查
|
|||
|
|
group_data = self.data[self.data['Group_Key'] == group_key]
|
|||
|
|
|
|||
|
|
# 确保group_data不为空
|
|||
|
|
if group_data.empty:
|
|||
|
|
print(f"⚠️ 警告: 分组 {group_key} 的数据为空,跳过报告生成")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
# 安全格式化数值
|
|||
|
|
def safe_format(value, default="N/A"):
|
|||
|
|
try:
|
|||
|
|
if pd.isna(value):
|
|||
|
|
return default
|
|||
|
|
return f"{float(value):.4f}"
|
|||
|
|
except (ValueError, TypeError):
|
|||
|
|
return default
|
|||
|
|
|
|||
|
|
html_content = f"""
|
|||
|
|
<!DOCTYPE html>
|
|||
|
|
<html>
|
|||
|
|
<head>
|
|||
|
|
<title>{group_key} - 详细分析报告</title>
|
|||
|
|
<style>
|
|||
|
|
body {{ font-family: Arial, sans-serif; margin: 20px; }}
|
|||
|
|
h1, h2, h3 {{ color: #333; }}
|
|||
|
|
.summary {{ background-color: #f5f5f5; padding: 15px; border-radius: 5px; margin: 15px 0; }}
|
|||
|
|
table {{ border-collapse: collapse; width: 100%; margin: 10px 0; }}
|
|||
|
|
th, td {{ border: 1px solid #ddd; padding: 8px; text-align: center; }}
|
|||
|
|
th {{ background-color: #4CAF50; color: white; }}
|
|||
|
|
.limits {{ background-color: #e8f5e8; font-weight: bold; }}
|
|||
|
|
.measured {{ background-color: #fff3cd; }}
|
|||
|
|
.chart-container {{ display: grid; grid-template-columns: 1fr 1fr; gap: 15px; margin: 15px 0; }}
|
|||
|
|
.chart {{ text-align: center; background-color: #fafafa; padding: 10px; border-radius: 5px; }}
|
|||
|
|
.chart img {{ max-width: 100%; height: auto; }}
|
|||
|
|
.nav {{ margin: 10px 0; }}
|
|||
|
|
.nav a {{ margin-right: 10px; }}
|
|||
|
|
</style>
|
|||
|
|
</head>
|
|||
|
|
<body>
|
|||
|
|
<div class="nav">
|
|||
|
|
<a href="../summary_report.html">返回汇总报告</a>
|
|||
|
|
<a href="index.html">返回索引</a>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
<h1>{group_key} - 详细分析报告</h1>
|
|||
|
|
<p>生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
|
|||
|
|
<p>输入文件位置: <code>{self.file_dir}</code></p>
|
|||
|
|
|
|||
|
|
<div class="summary">
|
|||
|
|
<h2>基本统计信息</h2>
|
|||
|
|
<table>
|
|||
|
|
<tr>
|
|||
|
|
<th>特征</th><th class="limits">预设下限(LSL)</th><th class="limits">预设上限(USL)</th>
|
|||
|
|
<th class="measured">实测最小值</th><th class="measured">实测最大值</th>
|
|||
|
|
<th>平均值</th><th>标准差</th><th>CPK</th>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td>Height(mil)</td><td class="limits">{safe_format(row.get('Height_Low(mil)'))}</td><td class="limits">{safe_format(row.get('Height_High(mil)'))}</td>
|
|||
|
|
<td class="measured">{safe_format(row.get('Height_Measured_Min(mil)'))}</td><td class="measured">{safe_format(row.get('Height_Measured_Max(mil)'))}</td>
|
|||
|
|
<td>{safe_format(row.get('Height_Mean(mil)'))}</td><td>{safe_format(row.get('Height_Std(mil)'))}</td><td>{safe_format(row.get('Height_Cpk'))}</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td>Volume(%)</td><td class="limits">{safe_format(row.get('Vol_Min(%)'))}</td><td class="limits">{safe_format(row.get('Vol_Max(%)'))}</td>
|
|||
|
|
<td class="measured">{safe_format(row.get('Vol_Measured_Min(%)'))}</td><td class="measured">{safe_format(row.get('Vol_Measured_Max(%)'))}</td>
|
|||
|
|
<td>{safe_format(row.get('Vol_Mean(%)'))}</td><td>{safe_format(row.get('Vol_Std(%)'))}</td><td>{safe_format(row.get('Volume_Cpk'))}</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td>Area(%)</td><td class="limits">{safe_format(row.get('Area_Min(%)'))}</td><td class="limits">{safe_format(row.get('Area_Max(%)'))}</td>
|
|||
|
|
<td class="measured">{safe_format(row.get('Area_Measured_Min(%)'))}</td><td class="measured">{safe_format(row.get('Area_Measured_Max(%)'))}</td>
|
|||
|
|
<td>{safe_format(row.get('Area_Mean(%)'))}</td><td>{safe_format(row.get('Area_Std(%)'))}</td><td>{safe_format(row.get('Area_Cpk'))}</td>
|
|||
|
|
</tr>
|
|||
|
|
</table>
|
|||
|
|
</div>
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
# 添加图表
|
|||
|
|
for feature_name in ['Height(mil)', 'Volume(%)', 'Area(%)']:
|
|||
|
|
chart_base64 = feature_charts.get(feature_name, "")
|
|||
|
|
if chart_base64 and len(chart_base64) > 0: # 明确的字符串检查
|
|||
|
|
html_content += f"""
|
|||
|
|
<h2>{feature_name} 分析图表</h2>
|
|||
|
|
<div class="chart-container">
|
|||
|
|
<div class="chart">
|
|||
|
|
<img src="data:image/png;base64,{chart_base64}" alt="{feature_name}统计图表">
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
"""
|
|||
|
|
else:
|
|||
|
|
html_content += f"""
|
|||
|
|
<h2>{feature_name} 分析图表</h2>
|
|||
|
|
<p>该特征的图表生成失败或数据不足。</p>
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
html_content += """
|
|||
|
|
</body>
|
|||
|
|
</html>
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
filename = self._sanitize_filename(group_key) + '.html'
|
|||
|
|
group_reports_dir = os.path.join(self.output_dir, 'group_reports')
|
|||
|
|
report_path = os.path.join(group_reports_dir, filename)
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
with open(report_path, 'w', encoding='utf-8') as f:
|
|||
|
|
f.write(html_content)
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ 保存分组报告失败 {filename}: {e}")
|
|||
|
|
|
|||
|
|
def _create_report_index(self):
|
|||
|
|
"""创建分组报告索引"""
|
|||
|
|
# 确保使用正确的索引获取方式
|
|||
|
|
if self.stats is None or len(self.stats.index) == 0:
|
|||
|
|
print("⚠️ 统计数据为空,创建空索引")
|
|||
|
|
return self._create_empty_index()
|
|||
|
|
|
|||
|
|
stats_index = list(self.stats.index) # 转换为列表
|
|||
|
|
|
|||
|
|
html_content = """
|
|||
|
|
<!DOCTYPE html>
|
|||
|
|
<html>
|
|||
|
|
<head>
|
|||
|
|
<title>分组报告索引</title>
|
|||
|
|
<style>
|
|||
|
|
body { font-family: Arial, sans-serif; margin: 20px; }
|
|||
|
|
h1 { color: #333; }
|
|||
|
|
.group-list { margin: 20px 0; }
|
|||
|
|
.group-item { margin: 5px 0; }
|
|||
|
|
.group-item a { text-decoration: none; color: #0066cc; }
|
|||
|
|
.nav { margin: 15px 0; }
|
|||
|
|
</style>
|
|||
|
|
</head>
|
|||
|
|
<body>
|
|||
|
|
<div class="nav">
|
|||
|
|
<a href="../summary_report.html">返回汇总报告</a>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
<h1>分组报告索引</h1>
|
|||
|
|
<p>共生成 """ + str(len(stats_index)) + """ 个分组报告</p>
|
|||
|
|
<p>输入文件位置: <code>""" + self.file_dir + """</code></p>
|
|||
|
|
|
|||
|
|
<div class="group-list">
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
for group_key in stats_index: # 使用列表而不是DataFrame索引
|
|||
|
|
filename = self._sanitize_filename(group_key) + '.html'
|
|||
|
|
html_content += f'<div class="group-item"><a href="{filename}">{group_key}</a></div>\n'
|
|||
|
|
|
|||
|
|
html_content += """
|
|||
|
|
</div>
|
|||
|
|
</body>
|
|||
|
|
</html>
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
index_path = os.path.join(self.output_dir, 'group_reports', 'index.html')
|
|||
|
|
try:
|
|||
|
|
with open(index_path, 'w', encoding='utf-8') as f:
|
|||
|
|
f.write(html_content)
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ 创建索引文件失败: {e}")
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
return index_path
|
|||
|
|
|
|||
|
|
def _create_empty_index(self):
|
|||
|
|
"""创建空索引文件"""
|
|||
|
|
html_content = """
|
|||
|
|
<!DOCTYPE html>
|
|||
|
|
<html>
|
|||
|
|
<head>
|
|||
|
|
<title>分组报告索引</title>
|
|||
|
|
<style>
|
|||
|
|
body { font-family: Arial, sans-serif; margin: 20px; }
|
|||
|
|
.warning { color: #856404; background-color: #fff3cd; padding: 20px; }
|
|||
|
|
</style>
|
|||
|
|
</head>
|
|||
|
|
<body>
|
|||
|
|
<h1>分组报告索引</h1>
|
|||
|
|
<div class="warning">
|
|||
|
|
<h2>⚠️ 没有分组报告</h2>
|
|||
|
|
<p>当前没有生成任何分组报告。</p>
|
|||
|
|
<p>输入文件位置: <code>""" + self.file_dir + """</code></p>
|
|||
|
|
</div>
|
|||
|
|
</body>
|
|||
|
|
</html>
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
index_path = os.path.join(self.output_dir, 'group_reports', 'index.html')
|
|||
|
|
with open(index_path, 'w', encoding='utf-8') as f:
|
|||
|
|
f.write(html_content)
|
|||
|
|
|
|||
|
|
return index_path
|
|||
|
|
|
|||
|
|
def _calculate_cpk(self, stats):
|
|||
|
|
"""计算CPK值 - 使用预设的上下限值"""
|
|||
|
|
print("详细计算CPK值...")
|
|||
|
|
|
|||
|
|
def calculate_single_cpk(mean, std, usl, lsl):
|
|||
|
|
"""计算单个特征的CPK"""
|
|||
|
|
if std == 0 or pd.isna(std):
|
|||
|
|
return np.nan
|
|||
|
|
|
|||
|
|
if pd.isna(usl) or pd.isna(lsl):
|
|||
|
|
return np.nan
|
|||
|
|
|
|||
|
|
# CPK = min[(USL - mean) / (3*std), (mean - LSL) / (3*std)]
|
|||
|
|
cpu = (usl - mean) / (3 * std) if usl != float('inf') else float('inf')
|
|||
|
|
cpl = (mean - lsl) / (3 * std) if lsl != float('-inf') else float('inf')
|
|||
|
|
|
|||
|
|
# 如果其中一个限值为无穷大,则返回另一个值
|
|||
|
|
if cpu == float('inf') and cpl == float('inf'):
|
|||
|
|
return np.nan
|
|||
|
|
elif cpu == float('inf'):
|
|||
|
|
return cpl
|
|||
|
|
elif cpl == float('inf'):
|
|||
|
|
return cpu
|
|||
|
|
else:
|
|||
|
|
return min(cpu, cpl)
|
|||
|
|
|
|||
|
|
# 确保CPK列不存在时创建
|
|||
|
|
cpk_results = []
|
|||
|
|
|
|||
|
|
for idx, row in stats.iterrows():
|
|||
|
|
print(f"计算分组 {idx} 的CPK值...")
|
|||
|
|
|
|||
|
|
# Height CPK - 使用预设的Height_High作为USL,Height_Low作为LSL
|
|||
|
|
height_cpk = calculate_single_cpk(
|
|||
|
|
row['Height_Mean(mil)'],
|
|||
|
|
row['Height_Std(mil)'],
|
|||
|
|
row['Height_High(mil)'], # USL - 预设上限
|
|||
|
|
row['Height_Low(mil)'] # LSL - 预设下限
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# Volume CPK - 使用预设的Vol_Max作为USL,Vol_Min作为LSL
|
|||
|
|
volume_cpk = calculate_single_cpk(
|
|||
|
|
row['Vol_Mean(%)'],
|
|||
|
|
row['Vol_Std(%)'],
|
|||
|
|
row['Vol_Max(%)'], # USL - 预设上限
|
|||
|
|
row['Vol_Min(%)'] # LSL - 预设下限
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# Area CPK - 使用预设的Area_Max作为USL,Area_Min作为LSL
|
|||
|
|
area_cpk = calculate_single_cpk(
|
|||
|
|
row['Area_Mean(%)'],
|
|||
|
|
row['Area_Std(%)'],
|
|||
|
|
row['Area_Max(%)'], # USL - 预设上限
|
|||
|
|
row['Area_Min(%)'] # LSL - 预设下限
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
cpk_results.append({
|
|||
|
|
'Height_Cpk': round(height_cpk, 4) if not pd.isna(height_cpk) else np.nan,
|
|||
|
|
'Volume_Cpk': round(volume_cpk, 4) if not pd.isna(volume_cpk) else np.nan,
|
|||
|
|
'Area_Cpk': round(area_cpk, 4) if not pd.isna(area_cpk) else np.nan
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# 将CPK结果添加到统计数据中
|
|||
|
|
cpk_df = pd.DataFrame(cpk_results, index=stats.index)
|
|||
|
|
stats = pd.concat([stats, cpk_df], axis=1)
|
|||
|
|
|
|||
|
|
print("✅ 所有分组CPK计算完成 - 使用预设上下限值")
|
|||
|
|
return stats
|
|||
|
|
|
|||
|
|
def _save_to_excel_advanced(self):
|
|||
|
|
"""保存Excel文件"""
|
|||
|
|
print("保存Excel文件...")
|
|||
|
|
|
|||
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|||
|
|
excel_filename = os.path.join(self.output_dir, 'statistics.xlsx')
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
with pd.ExcelWriter(excel_filename, engine='openpyxl') as writer:
|
|||
|
|
# 保存统计汇总
|
|||
|
|
if self.stats is not None:
|
|||
|
|
self.stats.reset_index().to_excel(writer, sheet_name='统计汇总', index=False)
|
|||
|
|
|
|||
|
|
# 保存前50个分组的数据
|
|||
|
|
MAX_GROUPS_TO_SAVE = 50
|
|||
|
|
unique_groups = self.data['Group_Key'].unique()[:MAX_GROUPS_TO_SAVE]
|
|||
|
|
|
|||
|
|
for i, group_key in enumerate(unique_groups):
|
|||
|
|
group_data = self.data[self.data['Group_Key'] == group_key].copy()
|
|||
|
|
sheet_name = f"组_{group_key}"[:31]
|
|||
|
|
group_data.to_excel(writer, sheet_name=sheet_name, index=False)
|
|||
|
|
|
|||
|
|
print(f"✅ Excel文件保存完成: {excel_filename}")
|
|||
|
|
return excel_filename
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ Excel文件保存失败: {e}")
|
|||
|
|
print(f" 错误详情: {traceback.format_exc()}")
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
"""主函数"""
|
|||
|
|
print("=== 数据统计报告生成程序(使用预设上下限值) ===")
|
|||
|
|
|
|||
|
|
processor = DataProcessor()
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
if processor.select_file():
|
|||
|
|
processor._load_data()
|
|||
|
|
report_path = processor.generate_report()
|
|||
|
|
print(f"✅ 报告生成完成")
|
|||
|
|
print(f"📁 输入文件目录: {processor.file_dir}")
|
|||
|
|
print(f"📁 输出目录: {processor.output_dir}")
|
|||
|
|
print(f"📊 汇总报告: {report_path}")
|
|||
|
|
|
|||
|
|
# 显示重要文件路径
|
|||
|
|
print(f"📊 Excel文件: {os.path.join(processor.output_dir, 'statistics.xlsx')}")
|
|||
|
|
else:
|
|||
|
|
print("❌ 未选择文件,程序退出")
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ 程序执行失败: {e}")
|
|||
|
|
print(f" 错误详情: {traceback.format_exc()}")
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
mp.set_start_method('spawn', force=True)
|
|||
|
|
main()
|