Files
PythonApp/htmlProcess/htmlReportProcess_Merge_picHtml_V1.py

927 lines
35 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import os
import re
import sys
import time
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from matplotlib.lines import Line2D
from typing import Optional, Tuple, List, Dict, Any, Union
from pathlib import Path
import numpy as np
import base64
from io import BytesIO
from jinja2 import Template
from colorama import Fore, Style, init
# 避免 SettingWithCopy 警告影响输出可读性
pd.options.mode.chained_assignment = None
# 设置中文字体支持
plt.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans', 'Arial Unicode MS', 'Microsoft YaHei']
plt.rcParams['axes.unicode_minus'] = False
# HTML模板 - 添加了SN独立图的显示
HTML_TEMPLATE = """
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>测试报告分析 - {{ keyword }}</title>
<style>
body {
font-family: Arial, sans-serif;
margin: 0;
padding: 20px;
background-color: #f5f5f5;
}
.header {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 20px;
border-radius: 10px;
margin-bottom: 20px;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}
.test-card {
background: white;
border-radius: 10px;
padding: 20px;
margin-bottom: 20px;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
transition: transform 0.2s ease;
}
.test-card:hover {
transform: translateY(-2px);
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15);
}
.test-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 15px;
padding-bottom: 10px;
border-bottom: 2px solid #eaeaea;
}
.test-title {
font-size: 18px;
font-weight: bold;
color: #333;
}
.test-stats {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 15px;
margin-bottom: 15px;
}
.stat-item {
background: #f8f9fa;
padding: 12px;
border-radius: 8px;
text-align: center;
}
.stat-label {
font-size: 12px;
color: #666;
margin-bottom: 5px;
}
.stat-value {
font-size: 16px;
font-weight: bold;
color: #333;
}
.plot-container {
text-align: center;
margin: 20px 0;
}
.plot-image {
max-width: 100%;
height: auto;
border-radius: 8px;
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
}
.sn-plots-container {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(400px, 1fr));
gap: 20px;
margin: 20px 0;
}
.sn-plot-item {
background: #f8f9fa;
padding: 15px;
border-radius: 8px;
text-align: center;
}
.sn-plot-title {
font-size: 14px;
font-weight: bold;
margin-bottom: 10px;
color: #555;
}
.summary {
background: white;
border-radius: 10px;
padding: 20px;
margin-top: 20px;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
}
.summary-item {
margin: 10px 0;
padding: 10px;
background: #f8f9fa;
border-radius: 6px;
}
.timestamp {
text-align: center;
color: #666;
margin-top: 30px;
font-size: 12px;
}
.badge {
padding: 4px 8px;
border-radius: 12px;
font-size: 12px;
font-weight: bold;
}
.badge-success {
background: #d4edda;
color: #155724;
}
.badge-warning {
background: #fff3cd;
color: #856404;
}
.badge-danger {
background: #f8d7da;
color: #721c24;
}
.section-title {
font-size: 16px;
font-weight: bold;
margin: 20px 0 10px 0;
color: #333;
border-left: 4px solid #667eea;
padding-left: 10px;
}
</style>
</head>
<body>
<div class="header">
<h1>📊 测试报告分析</h1>
<p>关键词: <strong>{{ keyword }}</strong> | 生成时间: {{ timestamp }}</p>
<p>共分析 {{ test_count }} 个测试项,{{ total_points }} 个数据点</p>
</div>
{% for test in tests %}
<div class="test-card">
<div class="test-header">
<div class="test-title">📋 {{ test.name }}</div>
<div class="badge badge-{{ test.status }}">
{{ test.status_display }}
</div>
</div>
<div class="test-stats">
<div class="stat-item">
<div class="stat-label">数据点数</div>
<div class="stat-value">{{ test.stats.count }}</div>
</div>
<div class="stat-item">
<div class="stat-label">平均值</div>
<div class="stat-value">{{ "%.4f"|format(test.stats.mean) }}</div>
</div>
<div class="stat-item">
<div class="stat-label">中位数</div>
<div class="stat-value">{{ "%.4f"|format(test.stats.median) }}</div>
</div>
<div class="stat-item">
<div class="stat-label">标准差</div>
<div class="stat-value">{{ "%.4f"|format(test.stats.std) }}</div>
</div>
<div class="stat-item">
<div class="stat-label">最小值</div>
<div class="stat-value">{{ "%.4f"|format(test.stats.min) }}</div>
</div>
<div class="stat-item">
<div class="stat-label">最大值</div>
<div class="stat-value">{{ "%.4f"|format(test.stats.max) }}</div>
</div>
</div>
{% if test.limits.lower is not none or test.limits.upper is not none %}
<div class="test-stats">
{% if test.limits.lower is not none %}
<div class="stat-item">
<div class="stat-label">下限值</div>
<div class="stat-value">{{ "%.4f"|format(test.limits.lower) }}</div>
</div>
{% endif %}
{% if test.limits.upper is not none %}
<div class="stat-item">
<div class="stat-label">上限值</div>
<div class="stat-value">{{ "%.4f"|format(test.limits.upper) }}</div>
</div>
{% endif %}
</div>
{% endif %}
<!-- 汇总图 -->
<div class="section-title">📈 汇总视图 (所有SN)</div>
<div class="plot-container">
<img src="data:image/png;base64,{{ test.summary_plot_image }}" alt="{{ test.name }} 汇总散点图" class="plot-image">
</div>
<!-- SN独立图 -->
{% if test.sn_plot_images %}
<div class="section-title">🔍 SN独立视图 ({{ test.sn_plot_images|length }}个SN)</div>
<div class="sn-plots-container">
{% for sn_plot in test.sn_plot_images %}
<div class="sn-plot-item">
<div class="sn-plot-title">SN: {{ sn_plot.sn }}</div>
<img src="data:image/png;base64,{{ sn_plot.image }}" alt="{{ test.name }} - SN {{ sn_plot.sn }} 散点图" class="plot-image">
</div>
{% endfor %}
</div>
{% endif %}
</div>
{% endfor %}
<div class="summary">
<h3>📈 分析摘要</h3>
<div class="summary-item">
<strong>文件路径:</strong> {{ file_path }}
</div>
<div class="summary-item">
<strong>分析时间:</strong> {{ analysis_time }}秒
</div>
<div class="summary-item">
<strong>测试项分布:</strong>
<ul>
<li>正常: {{ status_counts.normal }} 个</li>
<li>警告: {{ status_counts.warning }} 个</li>
<li>异常: {{ status_counts.abnormal }} 个</li>
</ul>
</div>
</div>
<div class="timestamp">
报告生成于 {{ timestamp }} | 测试报告分析系统
</div>
</body>
</html>
"""
class TestReportScatterPlotter:
def __init__(self):
self.file_path: Optional[str] = None
self.df: Optional[pd.DataFrame] = None
self.output_dir: Optional[str] = None
self.required_columns = ["Test Name New", "SN", "Measurement", "Test Time", "Lower Limit", "Upper Limit", ]
self.col_lower: Optional[str] = None
self.col_upper: Optional[str] = None
self.html_report_path: Optional[str] = None
# 缓存处理过的数据
self._processed_data_cache: Dict[str, Any] = {}
def _print_stage(self, msg: str) -> None:
"""统一的阶段信息输出"""
print(f"\n{'=' * 30}\n{msg}\n{'=' * 30}")
def _print_progress(self, current: int, total: int, prefix: str = "进度") -> None:
"""改进的进度条显示"""
if total <= 0:
return
percent = (current / total) * 100
bar_len = 30
filled = int(bar_len * current / total)
bar = "" * filled + "-" * (bar_len - filled)
sys.stdout.write(f"\r{prefix}: [{bar}] {current}/{total} ({percent:.1f}%)")
sys.stdout.flush()
if current == total:
print() # 换行
def get_file_path(self) -> None:
"""改进的文件路径获取,支持路径补全"""
self._print_stage("输入文件路径")
while True:
print(f"{Fore.WHITE}请输入测试报告文件路径(.xlsx): ")
file_path = input("> ").strip()
# 尝试路径补全和验证
if not file_path:
continue
path_obj = Path(file_path)
if path_obj.exists():
self.file_path = str(path_obj.resolve())
print(f"已选择文件: {self.file_path}")
break
else:
print(f"文件不存在: {file_path},请重新输入")
def _find_column_case_insensitive(self, candidates: List[str]) -> Optional[str]:
"""优化的大小写不敏感列查找"""
if self.df is None:
return None
columns_lower = {col.lower().strip(): col for col in self.df.columns}
for candidate in candidates:
key = candidate.lower().strip()
if key in columns_lower:
return columns_lower[key]
return None
def load_data(self) -> None:
"""优化的数据加载方法"""
self._print_stage("加载数据")
start_time = time.time()
# 检查文件是否存在
if not os.path.exists(self.file_path):
raise FileNotFoundError(f"文件不存在: {self.file_path}")
# 根据文件扩展名选择最优引擎
file_ext = self.file_path.lower()
if file_ext.endswith('.xlsx'):
# .xlsx 文件引擎选择优先级
engine_options = ['openpyxl', 'calamine'] # calamine需要安装并可能更快
engine = 'openpyxl' # 默认
elif file_ext.endswith('.xls'):
# .xls 文件引擎选择
engine_options = ['xlrd', 'calamine']
engine = 'xlrd' # 默认
else:
raise ValueError("输入文件不是有效的 Excel 文件(应为 .xls 或 .xlsx 格式)")
# 快速获取工作表名称(轻量级方式)
try:
if engine == 'openpyxl':
import openpyxl
workbook = openpyxl.load_workbook(self.file_path, read_only=True)
sheet_names = workbook.sheetnames
workbook.close()
elif engine == 'xlrd':
import xlrd
workbook = xlrd.open_workbook(self.file_path, on_demand=True)
sheet_names = workbook.sheet_names()
workbook.release_resources()
else:
# 使用pandas的轻量级方式
excel_file = pd.ExcelFile(self.file_path, engine=engine)
sheet_names = excel_file.sheet_names
except Exception as e:
raise RuntimeError(f"无法打开 Excel 文件,请确认该文件未被损坏或占用。错误: {type(e).__name__}: {e}")
# 定义优先查找的工作表名
target_sheets = ["Merged All Tests", "All Tests"]
selected_sheet = None
for sheet in target_sheets:
if sheet in sheet_names:
selected_sheet = sheet
break
if selected_sheet is None:
raise ValueError(
f"未找到指定的工作表: {''.join(target_sheets)}"
f"当前文件包含的工作表有: {sheet_names}"
)
try:
# 性能优化:使用更高效的参数设置
read_excel_kwargs = {
# 'filepath_or_buffer': self.file_path,
'io': self.file_path, # 修正:使用'io'而不是'filepath_or_buffer'
'sheet_name': selected_sheet,
'engine': engine,
'dtype': 'object', # 先统一读取为对象类型,减少类型推断时间
'na_filter': False, # 禁用自动NA过滤提高读取速度
}
# 如果知道必需列,且不为空,则只读取需要的列
if hasattr(self, 'required_columns') and self.required_columns:
# 先检查哪些列实际存在
try:
# 轻量级检查列名是否存在
sample_df = pd.read_excel(
self.file_path,
sheet_name=selected_sheet,
engine=engine,
nrows=1 # 只读取第一行来获取列名
)
existing_columns = [col for col in self.required_columns if col in sample_df.columns]
if len(existing_columns) < len(self.required_columns):
missing = set(self.required_columns) - set(existing_columns)
raise KeyError(f"缺少必要列: {list(missing)}")
read_excel_kwargs['usecols'] = existing_columns
# print(f"使用 read_excel_kwargs 读取excel:\n {read_excel_kwargs}")
# 打印完整的参数信息(调试用)
print("使用 read_excel_kwargs 读取excel:")
for key, value in read_excel_kwargs.items():
print(f" {key}: {repr(value)}") # 使用repr确保特殊字符正确显示
except Exception as e:
print(f"列检查失败,将读取所有列: {e}")
# 如果列检查失败,回退到读取所有列
# 执行数据读取
self._print_stage("执行数据读取")
self.df = pd.read_excel(**read_excel_kwargs)
except Exception as e:
# 如果默认引擎失败,尝试备选引擎
print(f"引擎 {engine} 读取失败,尝试备选引擎...\n{e}")
try:
# 回退到基本的读取方式
self.df = pd.read_excel(
self.file_path,
sheet_name=selected_sheet,
engine=None # 让pandas自动选择
)
except Exception as fallback_e:
raise RuntimeError(
f"读取 Excel 失败,工作表: '{selected_sheet}'"
f"主引擎错误: {type(e).__name__}: {e}\n"
f"备选引擎错误: {type(fallback_e).__name__}: {fallback_e}"
)
if self.df.empty:
raise ValueError("工作表为空,无法处理")
# 校验必要列如果前面没有使用usecols过滤这里需要再次检查
if hasattr(self, 'required_columns') and self.required_columns:
missing_columns = [col for col in self.required_columns if col not in self.df.columns]
if missing_columns:
raise KeyError(f"缺少必要列: {missing_columns}")
# 记录上下限列名
self.col_lower = self._find_column_case_insensitive([
"Lower Limit", "lower limit", "lower_limit", "ll", "lower"
])
self.col_upper = self._find_column_case_insensitive([
"Upper Limit", "upper limit", "upper_limit", "ul", "upper"
])
loading_time = time.time() - start_time
print(f"数据加载完成: {len(self.df)}× {self.df.shape[1]}")
print(f"使用引擎: {engine}")
print(f"耗时: {loading_time:.2f}s")
# 显示列信息摘要
print(f"检测到下限列: {self.col_lower or ''}")
print(f"检测到上限列: {self.col_upper or ''}")
# 可选:类型转换(如果知道具体的数据类型)
# self._convert_data_types()
# 可以添加这个方法进行类型转换优化
def _convert_data_types(self):
"""优化数据类型转换"""
if self.df is None or self.df.empty:
return
# 根据列名模式推断数据类型
numeric_patterns = ['limit', 'value', 'measure', 'result', 'score']
date_patterns = ['date', 'time', 'period']
for col in self.df.columns:
col_lower = str(col).lower()
# 数值类型转换
if any(pattern in col_lower for pattern in numeric_patterns):
self.df[col] = pd.to_numeric(self.df[col], errors='coerce')
# 日期类型转换
elif any(pattern in col_lower for pattern in date_patterns):
self.df[col] = pd.to_datetime(self.df[col], errors='coerce')
def get_keyword(self) -> Tuple[pd.DataFrame, str, List[str]]:
"""获取用户输入的关键词并筛选数据"""
self._print_stage("筛选关键词")
while True:
keyword = input("请输入筛选关键词(匹配 'Test Name New'): ").strip()
if not keyword:
print("❌ 关键词不能为空,请重新输入")
continue
# 检查数据框是否为空
if self.df.empty:
print("⚠️ 数据框为空,无法进行筛选")
return pd.DataFrame(), keyword, []
# 检查列是否存在
if "Test Name New" not in self.df.columns:
print("❌ 列 'Test Name New' 不存在于数据框中")
print(f"可用列: {list(self.df.columns)}")
return pd.DataFrame(), keyword, []
try:
mask = self.df["Test Name New"].astype(str).str.contains(keyword, case=False, na=False)
filtered_df = self.df.loc[mask].copy()
if filtered_df.empty:
# 提供友好的提示和建议,而不是直接抛出异常
print(f"⚠️ 没有找到包含关键词 '{keyword}' 的测试项")
# 显示部分可用的测试项作为参考
available_tests = self.df["Test Name New"].dropna().unique()
if len(available_tests) > 0:
print("📋 可用的测试项示例:")
for test in available_tests[:5]: # 只显示前5个作为参考
print(f" - {test}")
if len(available_tests) > 5:
print(f" ... 还有 {len(available_tests) - 5} 个测试项")
# 提供重新输入或退出的选项
choice = input("请选择: 1-重新输入关键词 2-使用所有数据 3-退出当前操作: ")
if choice == "1":
continue
elif choice == "2":
filtered_df = self.df.copy()
unique_tests = filtered_df["Test Name New"].unique().tolist()
print(f"✅ 使用所有数据: {len(filtered_df)} 行,{len(unique_tests)} 个测试项")
return filtered_df, "", unique_tests
else:
print("👋 退出筛选操作")
return pd.DataFrame(), keyword, []
else:
unique_tests = filtered_df["Test Name New"].unique().tolist()
print(f"✅ 匹配到 {len(filtered_df)} 行数据,涉及 {len(unique_tests)} 个不同测试项")
return filtered_df, keyword, unique_tests
except Exception as e:
print(f"❌ 筛选过程中发生错误: {e}")
print("请检查数据格式或重新输入关键词")
continue
def create_output_dir(self, keyword) -> None:
"""创建输出目录"""
self._print_stage("创建输出目录")
if not self.file_path:
raise ValueError("文件路径未设置")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
base_dir = os.path.dirname(self.file_path)
# self.output_dir = os.path.join(base_dir, f"scatter_report_{timestamp}")
self.output_dir = os.path.join(base_dir, f"scatter_report_out")
self.html_report_path = os.path.join(self.output_dir, f"{keyword}_report_{timestamp}.html")
os.makedirs(self.output_dir, exist_ok=True)
print(f"输出目录: {self.output_dir}")
@staticmethod
def _safe_filename(name: str) -> str:
"""生成安全的文件名"""
safe = "".join(c for c in str(name) if c.isalnum() or c in (" ", "_", "-")).strip()
return safe or "Unknown_Test"
def _extract_limits(self, df_one_test: pd.DataFrame) -> Tuple[
Optional[float], Optional[float], List[float], List[float]]:
"""提取某个测试项的上下限数值"""
lower_plot = upper_plot = None
lower_set = []
upper_set = []
if self.col_lower and self.col_lower in df_one_test.columns:
lower_vals = self._clean_and_convert_series(df_one_test[self.col_lower], 'numeric').dropna().unique()
lower_set = sorted(lower_vals.tolist()) if len(lower_vals) > 0 else []
if lower_set:
lower_plot = min(lower_set)
if self.col_upper and self.col_upper in df_one_test.columns:
upper_vals = self._clean_and_convert_series(df_one_test[self.col_upper], 'numeric').dropna().unique()
upper_set = sorted(upper_vals.tolist()) if len(upper_vals) > 0 else []
if upper_set:
upper_plot = max(upper_set)
return lower_plot, upper_plot, lower_set, upper_set
@staticmethod
def _clean_and_convert_series(series: pd.Series, target_type: str = 'numeric') -> pd.Series:
"""统一的系列清洗和转换方法 - 修复了 ast 方法名错误"""
if series.empty:
return series
if target_type == 'numeric':
# 数值转换优化
if pd.api.types.is_numeric_dtype(series):
return series.astype(float)
# 批量字符串处理 - 修复这里的问题
cleaned = series.astype(str).str.replace(r'[, ]', '', regex=True).str.strip()
return pd.to_numeric(cleaned, errors='coerce')
elif target_type == 'datetime':
return TestReportScatterPlotter._convert_to_datetime(series)
return series
@staticmethod
def _convert_to_datetime(series: pd.Series) -> pd.Series:
"""优化的日期时间转换"""
if pd.api.types.is_datetime64_any_dtype(series):
return series
# 预处理:转换为数值和字符串两种形式
numeric_series = pd.to_numeric(series, errors='coerce')
string_series = series.astype(str).str.strip()
result = pd.Series(pd.NaT, index=series.index, dtype='datetime64[ns]')
# 数值时间戳处理
masks = {
'ms': numeric_series >= 1e11,
's': (numeric_series >= 1e9) & (numeric_series < 1e11),
'excel': (numeric_series > 20000) & (numeric_series < 60000)
}
for mask_type, mask in masks.items():
if mask.any():
if mask_type == 'ms':
result.loc[mask] = pd.to_datetime(numeric_series.loc[mask], unit='ms')
elif mask_type == 's':
result.loc[mask] = pd.to_datetime(numeric_series.loc[mask], unit='s')
elif mask_type == 'excel':
origin = pd.Timestamp('1899-12-30')
result.loc[mask] = origin + pd.to_timedelta(numeric_series.loc[mask], unit='D')
# 字符串日期处理
remaining_mask = result.isna()
if remaining_mask.any():
remaining_strings = string_series.loc[remaining_mask]
# 特定格式优先处理
format_patterns = [
(r'^\d{4}-\d{2}-\d{2} \d{2}-\d{2}-\d{2}$', '%Y-%m-%d %H-%M-%S'),
]
for pattern, date_format in format_patterns:
format_mask = remaining_strings.str.match(pattern)
if format_mask.any():
result.loc[remaining_mask[remaining_mask].index[format_mask]] = pd.to_datetime(
remaining_strings.loc[format_mask], format=date_format, errors='coerce'
)
# 通用解析
still_na_mask = result.isna() & remaining_mask
if still_na_mask.any():
result.loc[still_na_mask] = pd.to_datetime(
string_series.loc[still_na_mask], errors='coerce'
)
return result
def _preprocess_test_data(self, test_data: pd.DataFrame) -> pd.DataFrame:
"""数据预处理"""
# 数值转换
test_data['Measurement_num'] = self._clean_and_convert_series(
test_data['Measurement'], 'numeric'
)
test_data['TestTime_dt'] = self._clean_and_convert_series(
test_data['Test Time'], 'datetime'
)
# 去除无效数据
valid_data = test_data.dropna(subset=['Measurement_num', 'TestTime_dt'])
return valid_data.sort_values('TestTime_dt')
def _calculate_statistics(self, y_data: pd.Series) -> Dict[str, float]:
"""计算统计信息"""
stats = {
'count': len(y_data),
'mean': y_data.mean(),
'median': y_data.median(),
'min': y_data.min(),
'max': y_data.max(),
'std': y_data.std(),
'q1': y_data.quantile(0.25),
'q3': y_data.quantile(0.75)
}
return stats
def _plot_to_base64(self, fig) -> str:
"""将图表转换为base64编码"""
buf = BytesIO()
fig.savefig(buf, format='png', dpi=150, bbox_inches='tight')
buf.seek(0)
img_str = base64.b64encode(buf.read()).decode('utf-8')
plt.close(fig)
return img_str
def _create_summary_plot(self, test_data: pd.DataFrame, test_name: str,
lower_plot: Optional[float], upper_plot: Optional[float]) -> str:
"""创建汇总图所有SN在一个图中"""
fig, ax = plt.subplots(figsize=(12, 8))
# 分组绘制
groups = list(test_data.groupby("SN")) if "SN" in test_data.columns else [("Unknown_SN", test_data)]
for sn, group in groups:
ax.scatter(group['TestTime_dt'], group['Measurement_num'],
label=str(sn), alpha=0.7, s=25)
# 计算统计信息
y_data = test_data['Measurement_num']
stats = self._calculate_statistics(y_data)
# 绘制限值线和统计线
x_min, x_max = test_data['TestTime_dt'].min(), test_data['TestTime_dt'].max()
if lower_plot is not None:
ax.axhline(y=lower_plot, color='green', linestyle='--', linewidth=1.2, label="Lower Limit")
if upper_plot is not None:
ax.axhline(y=upper_plot, color='red', linestyle='--', linewidth=1.2, label="Upper Limit")
# 添加统计线
ax.hlines(y=stats['mean'], xmin=x_min, xmax=x_max, colors='orange',
linestyles='-', linewidth=1.5, alpha=0.7, label='Mean')
ax.hlines(y=stats['median'], xmin=x_min, xmax=x_max, colors='purple',
linestyles='-.', linewidth=1.5, alpha=0.7, label='Median')
# 设置图形属性
ax.set_title(f"汇总图 - {test_name}")
ax.set_xlabel("Test Time")
ax.set_ylabel("Measurement Value")
ax.grid(True, alpha=0.3)
ax.tick_params(axis='x', rotation=45)
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
return self._plot_to_base64(fig)
def _create_sn_plots(self, test_data: pd.DataFrame, test_name: str,
lower_plot: Optional[float], upper_plot: Optional[float]) -> List[Dict[str, str]]:
"""为每个SN创建独立图表"""
sn_plots = []
if "SN" not in test_data.columns:
return sn_plots
sn_groups = test_data.groupby("SN")
for sn, group in sn_groups:
if group.empty:
continue
fig, ax = plt.subplots(figsize=(10, 6))
# 绘制当前SN的数据点
ax.scatter(group['TestTime_dt'], group['Measurement_num'],
color='blue', alpha=0.7, s=30, label=f"SN: {sn}")
# 计算当前SN的统计信息
y_data = group['Measurement_num']
stats = self._calculate_statistics(y_data)
# 绘制限值线
x_min, x_max = group['TestTime_dt'].min(), group['TestTime_dt'].max()
if lower_plot is not None:
ax.axhline(y=lower_plot, color='green', linestyle='--', linewidth=1.2, label="Lower Limit")
if upper_plot is not None:
ax.axhline(y=upper_plot, color='red', linestyle='--', linewidth=1.2, label="Upper Limit")
# 添加统计线
ax.hlines(y=stats['mean'], xmin=x_min, xmax=x_max, colors='orange',
linestyles='-', linewidth=1.5, alpha=0.7, label='Mean')
ax.hlines(y=stats['median'], xmin=x_min, xmax=x_max, colors='purple',
linestyles='-.', linewidth=1.5, alpha=0.7, label='Median')
# 设置图形属性
ax.set_title(f"SN独立图 - {test_name} (SN: {sn})")
ax.set_xlabel("Test Time")
ax.set_ylabel("Measurement Value")
ax.grid(True, alpha=0.3)
ax.tick_params(axis='x', rotation=45)
ax.legend()
# 转换为base64
plot_image = self._plot_to_base64(fig)
sn_plots.append({"sn": str(sn), "image": plot_image})
return sn_plots
def _determine_test_status(self, stats: Dict[str, float],
lower_limit: Optional[float],
upper_limit: Optional[float]) -> Dict[str, Any]:
"""确定测试状态"""
status = "success"
status_display = "正常"
if lower_limit is not None and upper_limit is not None:
# 检查是否超出限值
if stats['min'] < lower_limit or stats['max'] > upper_limit:
status = "danger"
status_display = "异常"
elif (stats['mean'] < lower_limit * 1.1 or stats['mean'] > upper_limit * 0.9 or
stats['std'] > (upper_limit - lower_limit) * 0.2):
status = "warning"
status_display = "警告"
return {"status": status, "status_display": status_display}
def generate_html_report(self, filtered_df: pd.DataFrame, keyword: str,
unique_tests: List[str]) -> None:
"""生成HTML报告"""
self._print_stage("生成HTML报告")
start_time = time.time()
test_results = []
total_points = 0
status_counts = {"success": 0, "warning": 0, "danger": 0}
for i, test_name in enumerate(unique_tests, 1):
self._print_progress(i, len(unique_tests), "生成测试报告")
# 获取测试数据
test_data = filtered_df[filtered_df["Test Name New"] == test_name].copy()
test_data = self._preprocess_test_data(test_data)
if test_data.empty:
continue
# 提取限值信息
lower_plot, upper_plot, _, _ = self._extract_limits(test_data)
# 计算统计信息
y_data = test_data['Measurement_num']
stats = self._calculate_statistics(y_data)
total_points += stats['count']
# 生成汇总图表
summary_plot_image = self._create_summary_plot(test_data, test_name, lower_plot, upper_plot)
# 生成SN独立图表
sn_plot_images = self._create_sn_plots(test_data, test_name, lower_plot, upper_plot)
# 确定测试状态
status_info = self._determine_test_status(stats, lower_plot, upper_plot)
status_counts[status_info["status"]] += 1
# 添加到结果列表
test_results.append({
"name": test_name,
"stats": stats,
"limits": {"lower": lower_plot, "upper": upper_plot},
"summary_plot_image": summary_plot_image,
"sn_plot_images": sn_plot_images,
"status": status_info["status"],
"status_display": status_info["status_display"]
})
# 渲染HTML模板
template = Template(HTML_TEMPLATE)
html_content = template.render(
keyword=keyword,
timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
test_count=len(test_results),
total_points=total_points,
tests=test_results,
file_path=self.file_path,
analysis_time=round(time.time() - start_time, 2),
status_counts={"normal": status_counts["success"], "warning": status_counts["warning"],
"abnormal": status_counts["danger"]}
)
# 保存HTML文件
with open(self.html_report_path, 'w', encoding='utf-8') as f:
f.write(html_content)
print(f"\nHTML报告已生成: {self.html_report_path}")
print(f"共处理 {len(test_results)} 个测试项,{total_points} 个数据点")
def run(self) -> None:
"""运行主程序"""
try:
self.get_file_path()
self.load_data()
while True:
filtered_df, keyword, unique_tests = self.get_keyword()
self.create_output_dir(keyword)
self.generate_html_report(filtered_df, keyword, unique_tests)
print(f"\n✅ 分析完成!")
# print(f"📊 报告文件: {self.html_report_path}")
# print(f"📁 输出目录: {self.output_dir}")
except KeyboardInterrupt:
print(f"\n{Fore.YELLOW}⚠ 用户中断程序")
except Exception as e:
print(f"\n❌ 发生错误: {type(e).__name__}: {str(e)}")
import traceback
traceback.print_exc()
sys.exit(1)
if __name__ == "__main__":
plotter = TestReportScatterPlotter()
plotter.run()