Compare commits

...

2 Commits

Author SHA1 Message Date
42c96feca3 更新优化点 2026-02-24 11:10:06 +08:00
5b237dc848 更新脚本优化点 2026-02-24 11:09:48 +08:00
3 changed files with 396 additions and 359 deletions

View File

@@ -70,6 +70,7 @@ class LogManager:
f.write("\n=== Cell统计 ===\n")
cell_stats = statistics_data['cell_statistics']
f.write(f"涉及Cell总数: {len(cell_stats)}\n")
f.write(f"{"="*30}\n")
# 按Cell编号排序
sorted_cells = sorted(cell_stats.keys(), key=lambda x: int(x) if x.isdigit() else x)
@@ -99,15 +100,16 @@ class LogManager:
fail_items.append(f"{messageappend})")
# 写入Cell统计信息
f.write(f"Cell {cell}: 文件数={file_count}, SN{sn_list}, FAIL数={fail_count}\n")
f.write(f" Cell {cell}: 文件数={file_count}, SN{sn_list}, FAIL数={fail_count}\n")
# 如果有失败项,写入失败详情
if fail_items:
f.write(f" FAIL项:\n{'\n'.join(fail_items)}\n")
f.write(f" FAIL项:\n ->{'\n ->'.join(fail_items)}\n")
# SN统计优化后的格式
if statistics_data.get('sn_statistics'):
f.write("\n=== SN统计 ===\n")
f.write(f"{"=" * 30}\n")
sn_stats = statistics_data['sn_statistics']
# 按SN排序按字母顺序
@@ -138,11 +140,11 @@ class LogManager:
fail_items.append(f"{messageappend})")
# 写入SN统计信息
f.write(f"SN {sn}: 文件数={file_count}, Cell数={len(cells)}, FAIL数={fail_count}\n")
f.write(f" SN {sn}: 文件数={file_count}, Cell数={len(cells)}, FAIL数={fail_count}\n")
# 如果有失败项,写入失败详情
if fail_items:
f.write(f" FAIL项:\n{'\n'.join(fail_items)}\n")
f.write(f" FAIL项:\n ->{'\n ->'.join(fail_items)}\n")
# 失败项统计
if statistics_data.get('failure_details'):

View File

@@ -30,4 +30,5 @@ htmlReportProcess_Merge_cmd_V2.py
htmlReportProcess_Merge.py
htmlReportProcess_picHtml_1kV2.py
htmlReportProcess_picHtml_2kV2.py
htmlReportProcess_picHtml_2kV2.py
htmlReportProcess_picHtml_2kV3.py

View File

@@ -4,6 +4,7 @@ import sys
import time
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from matplotlib.lines import Line2D
from typing import Optional, Tuple, List, Dict, Any, Union
@@ -36,13 +37,14 @@ OPTIMIZATION_CONFIG = {
'memory_limit_gb': psutil.virtual_memory().available // (1024 ** 3) * 0.7, # 内存限制
}
# HTML模板 - 增加汇总对比报告部分
HTML_TEMPLATE = """
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>测试报告分析 - {{ keywords_display }}</title>
<title>测试报告分析 - {{ keyword }}</title>
<style>
/* 样式保持不变,根据需要调整 */
body {
@@ -116,22 +118,20 @@ HTML_TEMPLATE = """
border-radius: 8px;
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
}
.comparison-plots-container {
.sn-plots-container {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(500px, 1fr));
grid-template-columns: repeat(auto-fit, minmax(400px, 1fr));
gap: 20px;
margin: 20px 0;
}
.comparison-plot-item {
background: white;
border: 1px solid #e0e0e0;
border-radius: 8px;
.sn-plot-item {
background: #f8f9fa;
padding: 15px;
border-radius: 8px;
text-align: center;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
}
.comparison-plot-title {
font-size: 16px;
.sn-plot-title {
font-size: 14px;
font-weight: bold;
margin-bottom: 10px;
color: #555;
@@ -187,27 +187,48 @@ HTML_TEMPLATE = """
border-radius: 6px;
margin: 10px 0;
}
.keyword-tag {
display: inline-block;
background: #e1f5fe;
color: #01579b;
padding: 4px 8px;
border-radius: 4px;
margin: 2px;
font-size: 12px;
.comparison-section {
background: white;
border-radius: 10px;
padding: 20px;
margin-top: 30px;
}
.comparison-plots {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(500px, 1fr));
gap: 20px;
margin: 20px 0;
}
.keyword-group {
background: #f0f8ff;
border-left: 4px solid #4169e1;
padding: 15px;
margin: 20px 0;
border-radius: 8px;
}
.keyword-title {
font-size: 18px;
font-weight: bold;
color: #4169e1;
margin-bottom: 10px;
}
.sub-test-card {
background: #f8f9fa;
border-radius: 8px;
padding: 15px;
margin: 10px 0;
border-left: 3px solid #667eea;
}
</style>
</head>
<body>
<div class="header">
<h1>📊 测试报告分析 (多文件合并)</h1>
<p>关键词:
{% for keyword in keywords %}
<span class="keyword-tag">{{ keyword }}</span>
{% endfor %}
</p>
<p>生成时间: {{ timestamp }}</p>
<p>关键词: <strong>{{ keyword }}</strong> | 生成时间: {{ timestamp }}</p>
<p>共分析 {{ test_count }} 个测试项,{{ total_points }} 个数据点,来自 {{ file_count }} 个文件</p>
{% if keyword_groups|length > 1 %}
<p>关键词分组: {{ keyword_groups|length }} 组</p>
{% endif %}
</div>
<div class="summary">
@@ -221,98 +242,114 @@ HTML_TEMPLATE = """
{% endfor %}
</div>
<!-- 关键词对比图 - 移动到更显眼的位置 -->
{% if comparison_plot_images and comparison_plot_images|length > 0 %}
<div class="test-card">
<div class="section-title">🔍 关键词对比视图</div>
<div class="comparison-plots-container">
{% for comparison_plot in comparison_plot_images %}
<div class="comparison-plot-item">
<div class="comparison-plot-title">{{ comparison_plot.title }}</div>
<img src="data:image/png;base64,{{ comparison_plot.image }}"
alt="{{ comparison_plot.title }}"
class="plot-image"
style="max-width: 100%; height: auto;">
<!-- 关键词分组显示测试项 -->
{% for group_name, group_tests in keyword_groups.items() %}
<div class="keyword-group">
<div class="keyword-title">🔍 关键词组: {{ group_name }}</div>
<!-- 显示该关键词组下的所有测试项 -->
{% for test_name, test_list in group_tests.items() %}
<div class="sub-test-card">
<div class="test-header">
<div class="test-title">📋 {{ test_name }}</div>
{% if test_list[0].status %}
<div class="badge badge-{{ test_list[0].status }}">
{{ test_list[0].status_display }}
</div>
{% endif %}
</div>
{% for test in test_list %}
<div class="test-stats">
<div class="stat-item">
<div class="stat-label">数据点数</div>
<div class="stat-value">{{ test.stats.count }}</div>
</div>
<div class="stat-item">
<div class="stat-label">平均值</div>
<div class="stat-value">{{ "%.4f"|format(test.stats.mean) }}</div>
</div>
<div class="stat-item">
<div class="stat-label">中位数</div>
<div class="stat-value">{{ "%.4f"|format(test.stats.median) }}</div>
</div>
<div class="stat-item">
<div class="stat-label">标准差</div>
<div class="stat-value">{{ "%.4f"|format(test.stats.std) }}</div>
</div>
<div class="stat-item">
<div class="stat-label">最小值</div>
<div class="stat-value">{{ "%.4f"|format(test.stats.min) }}</div>
</div>
<div class="stat-item">
<div class="stat-label">最大值</div>
<div class="stat-value">{{ "%.4f"|format(test.stats.max) }}</div>
</div>
</div>
{% if test.limits.lower is not none or test.limits.upper is not none %}
<div class="test-stats">
{% if test.limits.lower is not none %}
<div class="stat-item">
<div class="stat-label">下限值</div>
<div class="stat-value">{{ "%.4f"|format(test.limits.lower) }}</div>
</div>
{% endif %}
{% if test.limits.upper is not none %}
<div class="stat-item">
<div class="stat-label">上限值</div>
<div class="stat-value">{{ "%.4f"|format(test.limits.upper) }}</div>
</div>
{% endif %}
</div>
{% endif %}
<!-- 汇总图 -->
<div class="section-title">📈 汇总视图 (所有SN)</div>
<div class="plot-container">
<img src="data:image/png;base64,{{ test.summary_plot_image }}" alt="{{ test.name }} 汇总散点图" class="plot-image">
</div>
<!-- SN独立图 -->
{% if test.sn_plot_images %}
<div class="section-title">🔍 SN独立视图 ({{ test.sn_plot_images|length }}个SN)</div>
<div class="sn-plots-container">
{% for sn_plot in test.sn_plot_images %}
<div class="sn-plot-item">
<div class="sn-plot-title">SN: {{ sn_plot.sn }}</div>
<img src="data:image/png;base64,{{ sn_plot.image }}" alt="{{ test.name }} - SN {{ sn_plot.sn }} 散点图" class="plot-image">
</div>
{% endfor %}
</div>
{% endif %}
{% if not loop.last %}
<hr style="margin: 20px 0; border: none; border-top: 1px solid #e0e0e0;">
{% endif %}
{% endfor %}
</div>
{% endfor %}
</div>
{% endfor %}
<!-- 汇总对比报告 -->
{% if comparison_plots %}
<div class="comparison-section">
<h2>📊 汇总对比报告</h2>
<div class="comparison-plots">
{% for plot in comparison_plots %}
<div class="plot-container">
<h3>{{ plot.title }}</h3>
<img src="data:image/png;base64,{{ plot.image }}" alt="{{ plot.title }}" class="plot-image">
{% if plot.description %}
<p>{{ plot.description }}</p>
{% endif %}
</div>
{% endfor %}
</div>
</div>
{% endif %}
{% for test in tests %}
<div class="test-card">
<div class="test-header">
<div class="test-title">📋 {{ test.name }}</div>
<div class="badge badge-{{ test.status }}">
{{ test.status_display }}
</div>
</div>
<div class="test-stats">
<div class="stat-item">
<div class="stat-label">数据点数</div>
<div class="stat-value">{{ test.stats.count }}</div>
</div>
<div class="stat-item">
<div class="stat-label">平均值</div>
<div class="stat-value">{{ "%.4f"|format(test.stats.mean) }}</div>
</div>
<div class="stat-item">
<div class="stat-label">中位数</div>
<div class="stat-value">{{ "%.4f"|format(test.stats.median) }}</div>
</div>
<div class="stat-item">
<div class="stat-label">标准差</div>
<div class="stat-value">{{ "%.4f"|format(test.stats.std) }}</div>
</div>
<div class="stat-item">
<div class="stat-label">最小值</div>
<div class="stat-value">{{ "%.4f"|format(test.stats.min) }}</div>
</div>
<div class="stat-item">
<div class="stat-label">最大值</div>
<div class="stat-value">{{ "%.4f"|format(test.stats.max) }}</div>
</div>
</div>
{% if test.limits.lower is not none or test.limits.upper is not none %}
<div class="test-stats">
{% if test.limits.lower is not none %}
<div class="stat-item">
<div class="stat-label">下限值</div>
<div class="stat-value">{{ "%.4f"|format(test.limits.lower) }}</div>
</div>
{% endif %}
{% if test.limits.upper is not none %}
<div class="stat-item">
<div class="stat-label">上限值</div>
<div class="stat-value">{{ "%.4f"|format(test.limits.upper) }}</div>
</div>
{% endif %}
</div>
{% endif %}
<!-- 汇总图 -->
<div class="section-title">📈 汇总视图 (所有SN)</div>
<div class="plot-container">
<img src="data:image/png;base64,{{ test.summary_plot_image }}" alt="{{ test.name }} 汇总散点图" class="plot-image">
</div>
<!-- SN独立图 -->
{% if test.sn_plot_images %}
<div class="section-title">🔍 SN独立视图 ({{ test.sn_plot_images|length }}个SN)</div>
<div class="comparison-plots-container">
{% for sn_plot in test.sn_plot_images %}
<div class="comparison-plot-item">
<div class="comparison-plot-title">SN: {{ sn_plot.sn }}</div>
<img src="data:image/png;base64,{{ sn_plot.image }}" alt="{{ test.name }} - SN {{ sn_plot.sn }} 散点图" class="plot-image">
</div>
{% endfor %}
</div>
{% endif %}
</div>
{% endfor %}
<div class="summary">
<h3>📈 分析摘要</h3>
<div class="summary-item">
@@ -322,10 +359,12 @@ HTML_TEMPLATE = """
<strong>分析时间:</strong> {{ analysis_time }}秒
</div>
<div class="summary-item">
<strong>关键词:</strong>
{% for keyword in keywords %}
<span class="keyword-tag">{{ keyword }}</span>
{% endfor %}
<strong>关键词分组:</strong> {{ keyword_groups|length }} 组
<ul>
{% for group_name, group_tests in keyword_groups.items() %}
<li>{{ group_name }}: {{ group_tests|length }} 个测试项</li>
{% endfor %}
</ul>
</div>
<div class="summary-item">
<strong>测试项分布:</strong>
@@ -363,6 +402,7 @@ class MultiFileTestReportScatterPlotter:
self.col_lower: Optional[str] = None
self.col_upper: Optional[str] = None
self.html_report_path: Optional[str] = None
self.comparison_report_path: Optional[str] = None
self.file_infos: List[Dict[str, Any]] = []
# 缓存处理过的数据
@@ -517,8 +557,6 @@ class MultiFileTestReportScatterPlotter:
self._print_error(f"加载文件失败 {filename}: {e}")
return None
def _find_column_case_insensitive(self, candidates: List[str]) -> Optional[str]:
"""优化的大小写不敏感列查找"""
if self.df is None:
@@ -608,7 +646,7 @@ class MultiFileTestReportScatterPlotter:
else:
# 顺序加载
for i, file_info in enumerate(file_infos, 1):
self._print_progress(i, len(excel_files), "加载文件")
self._print_progress(i, len(file_infos), "加载文件")
df = self._load_single_file_optimized(file_info)
if df is not None:
all_dataframes.append(df)
@@ -649,47 +687,50 @@ class MultiFileTestReportScatterPlotter:
self.col_upper = self._find_column_case_insensitive([
"Upper Limit", "upper limit", "upper_limit", "ul", "upper"
])
def get_keywords(self) -> Tuple[pd.DataFrame, List[str], List[str]]:
"""获取用户输入的关键词并筛选数据 - 修改为支持多个关键词"""
def get_keywords(self) -> Tuple[pd.DataFrame, str, List[str], Dict[str, List[str]]]:
"""获取用户输入的多个关键词并筛选数据,返回关键词分组信息"""
self._print_stage("筛选关键词")
while True:
keyword_input = input("请输入一个或多个关键词(用逗号分隔,匹配 'Test Name New'): ").strip()
keyword_input = input("请输入筛选关键词(多个关键词用','分割,匹配 'Test Name New'): ").strip()
if not keyword_input:
print("❌ 关键词不能为空,请重新输入")
continue
# 分割关键词
keywords = [k.strip() for k in keyword_input.split(',') if k.strip()]
if not keywords:
print("❌ 没有有效的关键词,请重新输入")
continue
# 检查数据框是否为空
if self.df is None or self.df.empty:
if self.df.empty:
print("⚠️ 数据框为空,无法进行筛选")
return pd.DataFrame(), keywords, []
return pd.DataFrame(), keyword_input, [], {}
# 检查列是否存在
if "Test Name New" not in self.df.columns:
print("❌ 列 'Test Name New' 不存在于数据框中")
print(f"可用列: {list(self.df.columns)}")
return pd.DataFrame(), keywords, []
return pd.DataFrame(), keyword_input, [], {}
try:
# 创建多个关键词的筛选条件
# 分割关键词
keywords = [k.strip() for k in keyword_input.split(',') if k.strip()]
# 构建多个关键词的筛选条件
mask = pd.Series(False, index=self.df.index)
keyword_groups = {}
for keyword in keywords:
keyword_mask = self.df["Test Name New"].astype(str).str.contains(keyword, case=False, na=False)
mask = mask | keyword_mask
# 记录每个关键词匹配的测试项
matched_tests = self.df.loc[keyword_mask, "Test Name New"].unique().tolist()
keyword_groups[keyword] = matched_tests
filtered_df = self.df.loc[mask].copy()
if filtered_df.empty:
# 提供友好的提示和建议
print(f"⚠️ 没有找到包含关键词 '{', '.join(keywords)}' 的测试项")
print(f"⚠️ 没有找到包含关键词 '{keyword_input}' 的测试项")
# 显示部分可用的测试项作为参考
available_tests = self.df["Test Name New"].dropna().unique()
@@ -707,16 +748,21 @@ class MultiFileTestReportScatterPlotter:
elif choice == "2":
filtered_df = self.df.copy()
unique_tests = filtered_df["Test Name New"].unique().tolist()
# 为所有数据创建一个默认分组
keyword_groups = {"所有测试项": unique_tests}
print(f"✅ 使用所有数据: {len(filtered_df)} 行,{len(unique_tests)} 个测试项")
return filtered_df, keywords, unique_tests
return filtered_df, "", unique_tests, keyword_groups
else:
print("👋 退出筛选操作")
return pd.DataFrame(), keywords, []
return pd.DataFrame(), keyword_input, [], {}
else:
unique_tests = filtered_df["Test Name New"].unique().tolist()
print(f"✅ 匹配到 {len(filtered_df)} 行数据,涉及 {len(unique_tests)} 个不同测试项")
print(f" 使用的关键词: {', '.join(keywords)}")
return filtered_df, keywords, unique_tests
print(f"📊 关键词分组: {len(keyword_groups)}")
for keyword, tests in keyword_groups.items():
print(f" - '{keyword}': {len(tests)} 个测试项")
return filtered_df, keyword_input, unique_tests, keyword_groups
except Exception as e:
print(f"❌ 筛选过程中发生错误: {e}")
@@ -735,14 +781,17 @@ class MultiFileTestReportScatterPlotter:
safe_keyword = self._safe_filename(keyword) if keyword else "all_data"
self.html_report_path = os.path.join(self.output_dir, f"{safe_keyword}_report_{timestamp}.html")
# 为汇总对比报告创建单独的文件路径
self.comparison_report_path = os.path.join(self.output_dir,
f"{safe_keyword}_comparison_report_{timestamp}.html")
os.makedirs(self.output_dir, exist_ok=True)
print(f"输出目录: {self.output_dir}")
@staticmethod
def _safe_filename(name: str) -> str:
"""生成安全的文件名"""
safe = "".join(c for c in str(name) if c.isalnum() or c in (" ", "_", "-")).strip()
safe = "".join(c for c in str(name) if c.isalnum() or c in (","," ", "_", "-")).strip()
return safe or "Unknown_Test"
def _extract_limits(self, df_one_test: pd.DataFrame) -> Tuple[
@@ -766,10 +815,9 @@ class MultiFileTestReportScatterPlotter:
return lower_plot, upper_plot, lower_set, upper_set
@staticmethod
def _clean_and_convert_series(series: pd.Series, target_type: str = 'numeric') -> pd.Series:
"""统一的系列清洗和转换方法 - 修复了 ast 方法名错误"""
"""统一的系列清洗和转换方法"""
if series.empty:
return series
@@ -778,7 +826,7 @@ class MultiFileTestReportScatterPlotter:
if pd.api.types.is_numeric_dtype(series):
return series.astype(float)
# 批量字符串处理 - 修复这里的问题
# 批量字符串处理
cleaned = series.astype(str).str.replace(r'[, ]', '', regex=True).str.strip()
return pd.to_numeric(cleaned, errors='coerce')
@@ -787,7 +835,6 @@ class MultiFileTestReportScatterPlotter:
return series
@staticmethod
def _convert_to_datetime(series: pd.Series) -> pd.Series:
"""优化的日期时间转换"""
@@ -877,25 +924,6 @@ class MultiFileTestReportScatterPlotter:
valid_data = test_data.dropna(subset=['Measurement_num', 'TestTime_dt'])
return valid_data.sort_values('TestTime_dt')
def _preprocess_data(self, test_data: pd.DataFrame) -> pd.DataFrame:
"""数据预处理 - 简化版本用于对比图"""
test_data = test_data.copy()
# 数值转换
test_data['Measurement_num'] = self._clean_and_convert_series(
test_data['Measurement'], 'numeric'
)
test_data['TestTime_dt'] = self._clean_and_convert_series(
test_data['Test Time'], 'datetime'
)
# 去除无效数据
valid_data = test_data.dropna(subset=['Measurement_num', 'TestTime_dt'])
return valid_data
def _calculate_statistics(self, y_data: pd.Series) -> Dict[str, float]:
"""计算统计信息"""
stats = {
@@ -1010,7 +1038,6 @@ class MultiFileTestReportScatterPlotter:
return sn_plots
def _determine_test_status(self, stats: Dict[str, float],
lower_limit: Optional[float],
upper_limit: Optional[float]) -> Dict[str, Any]:
@@ -1030,207 +1057,220 @@ class MultiFileTestReportScatterPlotter:
return {"status": status, "status_display": status_display}
def _create_comparison_plots(self, filtered_df: pd.DataFrame, keywords: List[str]) -> List[Dict[str, str]]:
"""创建多关键词对比图 - 优化版本:时间序列改为散点图"""
def _create_comparison_plots(self, filtered_df: pd.DataFrame, keyword_groups: Dict[str, List[str]]) -> List[
Dict[str, str]]:
"""创建汇总对比报告图表,按关键词和测试项分组显示"""
comparison_plots = []
if len(keywords) <= 1:
return comparison_plots # 单一关键词不需要对比图
if filtered_df.empty or "Test Name New" not in filtered_df.columns:
return comparison_plots
try:
# 确保有足够的数据
if filtered_df.empty:
return comparison_plots
# 预处理数据
processed_data = self._preprocess_test_data(filtered_df.copy())
# 1. 散点图对比(原时间序列图改为散点图)
fig1, ax1 = plt.subplots(figsize=(14, 8))
has_data = False
if processed_data.empty:
return comparison_plots
# 使用更鲜艳的颜色
colors = plt.cm.Set3(np.linspace(0, 1, len(keywords)))
markers = ['o', 's', '^', 'D', 'v', '<', '>', 'p', '*', 'h'] # 多种标记形状
# 1. 时间序列散点图 - 按关键词和测试项分别显示
num_tests_total = sum(len(tests) for tests in keyword_groups.values())
if num_tests_total > 0:
# 计算适当的子图布局
max_cols = 2
num_rows = (num_tests_total + max_cols - 1) // max_cols
# 获取全局时间范围用于统一x轴
global_min_time = None
global_max_time = None
fig1, axes = plt.subplots(num_rows, max_cols, figsize=(16, 6 * num_rows))
if num_rows == 1:
axes = axes.reshape(1, -1)
elif num_tests_total == 1:
axes = np.array([[axes]])
# 先收集所有数据的时间范围
for keyword in keywords:
keyword_mask = filtered_df["Test Name New"].astype(str).str.contains(
re.escape(keyword), case=False, na=False
)
keyword_data = filtered_df.loc[keyword_mask].copy()
axes = axes.flatten()
if not keyword_data.empty:
keyword_data = self._preprocess_test_data(keyword_data)
if not keyword_data.empty:
time_min = keyword_data['TestTime_dt'].min()
time_max = keyword_data['TestTime_dt'].max()
plot_idx = 0
colors = plt.cm.Set3(np.linspace(0, 1, 12))
if global_min_time is None or time_min < global_min_time:
global_min_time = time_min
if global_max_time is None or time_max > global_max_time:
global_max_time = time_max
for keyword, test_names in keyword_groups.items():
for test_name in test_names:
if plot_idx >= len(axes):
break
for i, keyword in enumerate(keywords):
keyword_mask = filtered_df["Test Name New"].astype(str).str.contains(
re.escape(keyword), case=False, na=False
)
keyword_data = filtered_df.loc[keyword_mask].copy()
# 筛选当前测试项的数据
test_data = processed_data[processed_data["Test Name New"] == test_name]
if not keyword_data.empty:
keyword_data = self._preprocess_test_data(keyword_data)
if not keyword_data.empty:
# 对散点图数据添加少量随机抖动,避免完全重叠
if len(keyword_data) > 1:
# 为相同时间点的数据添加微小的时间偏移,避免重叠
time_jitter = pd.Timedelta(minutes=1) # 1分钟抖动
jitter_range = np.random.uniform(-0.5, 0.5, len(keyword_data)) * time_jitter
keyword_data['TestTime_dt_jittered'] = keyword_data['TestTime_dt'] + jitter_range
x_values = keyword_data['TestTime_dt_jittered']
else:
x_values = keyword_data['TestTime_dt']
if test_data.empty:
continue
y_values = keyword_data['Measurement_num']
ax = axes[plot_idx]
if len(x_values) > 0:
# 使用散点图,设置不同的标记和透明度
scatter = ax1.scatter(x_values, y_values,
label=f"{keyword} (n={len(keyword_data)})",
color=colors[i],
marker=markers[i % len(markers)],
s=40, # 点的大小
alpha=0.7, # 透明度
edgecolors='white', # 边缘颜色
linewidth=0.5) # 边缘线宽
has_data = True
# 按SN分组绘制
sn_groups = test_data.groupby("SN")
color_idx = 0
# 为每个关键词添加趋势线(可选)
if len(keyword_data) >= 3:
try:
# 按时间排序
sorted_data = keyword_data.sort_values('TestTime_dt')
# 计算简单线性趋势
x_numeric = pd.to_numeric(sorted_data['TestTime_dt'])
y_trend = sorted_data['Measurement_num']
for sn, group in sn_groups:
ax.scatter(group['TestTime_dt'], group['Measurement_num'],
c=[colors[color_idx % len(colors)]], label=str(sn), alpha=0.7, s=30)
color_idx += 1
if len(x_numeric) >= 2:
# 使用numpy的polyfit计算趋势线
z = np.polyfit(x_numeric, y_trend, 1)
p = np.poly1d(z)
ax1.plot(sorted_data['TestTime_dt'], p(x_numeric),
color=colors[i], linestyle='--',
alpha=0.5, linewidth=1,
label=f"{keyword} 趋势线")
except:
pass # 趋势线计算失败时忽略
# 添加上下限
lower_plot, upper_plot, _, _ = self._extract_limits(test_data)
if lower_plot is not None:
ax.axhline(y=lower_plot, color='green', linestyle='--', linewidth=2, label="Lower Limit")
if upper_plot is not None:
ax.axhline(y=upper_plot, color='red', linestyle='--', linewidth=2, label="Upper Limit")
if has_data:
# 设置统一的x轴范围
if global_min_time and global_max_time:
# 添加一些边距
time_range = global_max_time - global_min_time
margin = time_range * 0.05
ax1.set_xlim(global_min_time - margin, global_max_time + margin)
ax.set_title(f"{test_name}\n(关键词: {keyword})", fontsize=10)
ax.set_xlabel("测试时间")
ax.set_ylabel("测量值")
ax.grid(True, alpha=0.3)
ax.tick_params(axis='x', rotation=45)
ax.legend(fontsize=8)
ax1.set_title("多关键词散点图对比", fontsize=14, fontweight='bold')
ax1.set_xlabel("测试时间", fontsize=12)
ax1.set_ylabel("测量值", fontsize=12)
ax1.grid(True, alpha=0.3)
ax1.tick_params(axis='x', rotation=45)
plot_idx += 1
# 优化图例显示
ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left',
fontsize=10, framealpha=0.9)
plt.tight_layout()
# 隐藏多余的子图
for idx in range(plot_idx, len(axes)):
axes[idx].set_visible(False)
comparison_plots.append({
"title": "散点图对比",
"image": self._plot_to_base64(fig1)
})
plt.tight_layout()
comparison_plots.append({
"title": "时间序列散点图(按测试项分组)",
"image": self._plot_to_base64(fig1),
"description": "每个测试项单独显示按SN区分不同数据点"
})
# 2. 箱线图对比(简化版)
plot_data = []
labels = []
# 2. 箱线图 - 按关键词分组,每个关键词组内的测试项分别显示
if len(keyword_groups) > 0:
fig2, axes = plt.subplots(len(keyword_groups), 1, figsize=(14, 6 * len(keyword_groups)))
if len(keyword_groups) == 1:
axes = [axes]
for keyword in keywords:
keyword_mask = filtered_df["Test Name New"].astype(str).str.contains(
re.escape(keyword), case=False, na=False
)
keyword_data = filtered_df.loc[keyword_mask].copy()
for idx, (keyword, test_names) in enumerate(keyword_groups.items()):
ax = axes[idx]
if not keyword_data.empty:
keyword_data = self._preprocess_test_data(keyword_data)
if not keyword_data.empty and len(keyword_data) >= 3: # 至少3个数据点
plot_data.append(keyword_data['Measurement_num'].values)
labels.append(f"{keyword}\n(n={len(keyword_data)})")
boxplot_data = []
boxplot_labels = []
if len(plot_data) >= 2: # 至少两个关键词有数据
fig2, ax2 = plt.subplots(figsize=(10, 6))
bp = ax2.boxplot(plot_data, tick_labels=labels, patch_artist=True)
for test_name in test_names:
test_data = processed_data[processed_data["Test Name New"] == test_name]
if not test_data.empty:
boxplot_data.append(test_data['Measurement_num'].values)
boxplot_labels.append(test_name)
colors = plt.cm.Set3(np.linspace(0, 1, len(plot_data)))
for i, box in enumerate(bp['boxes']):
box.set(facecolor=colors[i], alpha=0.7)
if boxplot_data:
box_plot = ax.boxplot(boxplot_data, tick_labels=boxplot_labels, patch_artist=True)
ax2.set_title("多关键词箱线图对比", fontsize=14, fontweight='bold')
ax2.set_ylabel("测量值", fontsize=12)
ax2.grid(True, alpha=0.3)
plt.tight_layout()
# 设置箱线图颜色
colors_box = plt.cm.Set3(np.linspace(0, 1, len(boxplot_data)))
for patch, color in zip(box_plot['boxes'], colors_box):
patch.set_facecolor(color)
comparison_plots.append({
"title": "箱线图对比",
"image": self._plot_to_base64(fig2)
})
# 添加上下限线(使用第一个测试项的数据)
if boxplot_data:
first_test_data = processed_data[processed_data["Test Name New"] == test_names[0]]
lower_plot, upper_plot, _, _ = self._extract_limits(first_test_data)
if lower_plot is not None:
ax.axhline(y=lower_plot, color='green', linestyle='--', linewidth=2, label="Lower Limit")
if upper_plot is not None:
ax.axhline(y=upper_plot, color='red', linestyle='--', linewidth=2, label="Upper Limit")
# 3. 分布直方图对比
fig3, ax3 = plt.subplots(figsize=(12, 6))
has_hist_data = False
ax.set_title(f"箱线图 - {keyword}", fontsize=12)
ax.set_ylabel("测量值")
ax.tick_params(axis='x', rotation=45)
ax.grid(True, alpha=0.3)
ax.legend()
colors = plt.cm.Set3(np.linspace(0, 1, len(keywords)))
plt.tight_layout()
comparison_plots.append({
"title": "箱线图(按关键词分组)",
"image": self._plot_to_base64(fig2),
"description": "每个关键词组单独显示,组内测试项分别绘制箱线图"
})
for i, keyword in enumerate(keywords):
keyword_mask = filtered_df["Test Name New"].astype(str).str.contains(
re.escape(keyword), case=False, na=False
)
keyword_data = filtered_df.loc[keyword_mask].copy()
# 3. 概率分布直方图 - 按测试项分组显示
if num_tests_total > 0:
max_cols = 2
num_rows = (num_tests_total + max_cols - 1) // max_cols
if not keyword_data.empty:
keyword_data = self._preprocess_test_data(keyword_data)
if not keyword_data.empty:
# 动态调整直方图bins数量
n_bins = min(20, max(5, len(keyword_data) // 10))
ax3.hist(keyword_data['Measurement_num'].values,
bins=n_bins,
alpha=0.6,
label=f"{keyword} (n={len(keyword_data)})",
color=colors[i],
density=True) # 使用密度而不是频次
has_hist_data = True
fig3, axes = plt.subplots(num_rows, max_cols, figsize=(16, 6 * num_rows))
if num_rows == 1:
axes = axes.reshape(1, -1)
elif num_tests_total == 1:
axes = np.array([[axes]])
if has_hist_data:
ax3.set_title("多关键词分布直方图对比", fontsize=14, fontweight='bold')
ax3.set_xlabel("测量值", fontsize=12)
ax3.set_ylabel("密度", fontsize=12)
ax3.legend()
ax3.grid(True, alpha=0.3)
plt.tight_layout()
axes = axes.flatten()
comparison_plots.append({
"title": "分布直方图对比",
"image": self._plot_to_base64(fig3)
})
plot_idx = 0
colors_hist = plt.cm.Set3(np.linspace(0, 1, 12))
except Exception as e:
self._print_warning(f"创建对比图时出错: {e}")
import traceback
traceback.print_exc()
for keyword, test_names in keyword_groups.items():
for test_name in test_names:
if plot_idx >= len(axes):
break
test_data = processed_data[processed_data["Test Name New"] == test_name]
if test_data.empty or len(test_data) < 2:
continue
ax = axes[plot_idx]
# 绘制概率分布直方图
sns.histplot(test_data['Measurement_num'], kde=True,
color=colors_hist[plot_idx % len(colors_hist)], alpha=0.7, ax=ax)
# 添加上下限线
lower_plot, upper_plot, _, _ = self._extract_limits(test_data)
if lower_plot is not None:
ax.axvline(x=lower_plot, color='green', linestyle='--', linewidth=2, label="Lower Limit")
if upper_plot is not None:
ax.axvline(x=upper_plot, color='red', linestyle='--', linewidth=2, label="Upper Limit")
ax.set_title(f"{test_name}\n(关键词: {keyword})", fontsize=10)
ax.set_xlabel("测量值")
ax.set_ylabel("频率")
ax.grid(True, alpha=0.3)
ax.legend()
plot_idx += 1
# 隐藏多余的子图
for idx in range(plot_idx, len(axes)):
axes[idx].set_visible(False)
plt.tight_layout()
comparison_plots.append({
"title": "概率分布直方图(按测试项分组)",
"image": self._plot_to_base64(fig3),
"description": "每个测试项单独显示概率分布和上下限"
})
return comparison_plots
def generate_html_report(self, filtered_df: pd.DataFrame, keywords: List[str],
unique_tests: List[str]) -> None:
"""生成HTML报告 - 修复对比图显示问题"""
def _organize_tests_by_keyword_groups(self, test_results: List[Dict[str, Any]],
keyword_groups: Dict[str, List[str]]) -> Dict[
str, Dict[str, List[Dict[str, Any]]]]:
"""按关键词分组组织测试结果,每个测试项单独显示"""
organized_groups = {}
# 构建测试名称到测试结果的映射
test_name_to_result = {test['name']: test for test in test_results}
# 为每个关键词组分配测试结果
for keyword, test_names in keyword_groups.items():
group_tests = {}
for test_name in test_names:
if test_name in test_name_to_result:
# 每个测试项单独作为一个分组
group_tests[test_name] = [test_name_to_result[test_name]]
if group_tests:
organized_groups[keyword] = group_tests
return organized_groups
def generate_html_report(self, filtered_df: pd.DataFrame, keyword: str,
unique_tests: List[str], keyword_groups: Dict[str, List[str]]) -> None:
"""生成HTML报告"""
self._print_stage("生成HTML报告")
start_time = time.time()
@@ -1238,16 +1278,6 @@ class MultiFileTestReportScatterPlotter:
total_points = 0
status_counts = {"success": 0, "warning": 0, "danger": 0}
# 生成多关键词对比图
print(f"🔍 调试: 开始生成对比图,关键词数量: {len(keywords)}")
comparison_plot_images = self._create_comparison_plots(filtered_df, keywords)
print(f"🔍 调试: 对比图生成完成,数量: {len(comparison_plot_images)}")
# 调试输出对比图信息
for i, plot in enumerate(comparison_plot_images):
print(f" - 对比图{i + 1}: {plot['title']}, 图像大小: {len(plot['image'])} 字符")
# 生成各测试项的详细图表
for i, test_name in enumerate(unique_tests, 1):
self._print_progress(i, len(unique_tests), "生成测试报告")
@@ -1287,14 +1317,16 @@ class MultiFileTestReportScatterPlotter:
"status_display": status_info["status_display"]
})
# 渲染HTML模板前再次验证数据
print(f"🔍 调试: 传递给模板的对比图数量: {len(comparison_plot_images)}")
# 按关键词分组组织测试结果,每个测试项单独显示
organized_keyword_groups = self._organize_tests_by_keyword_groups(test_results, keyword_groups)
# 生成汇总对比报告图表
comparison_plots = self._create_comparison_plots(filtered_df, keyword_groups)
# 渲染HTML模板
template = Template(HTML_TEMPLATE)
html_content = template.render(
keywords=keywords if keywords else ["所有数据"],
keywords_display=", ".join(keywords) if keywords else "所有数据",
keyword=keyword if keyword else "所有数据",
timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
test_count=len(test_results),
total_points=total_points,
@@ -1306,15 +1338,17 @@ class MultiFileTestReportScatterPlotter:
file_count=len(self.file_infos),
file_infos=self.file_infos,
total_rows=len(self.df) if self.df is not None else 0,
comparison_plot_images=comparison_plot_images # 确保传递
comparison_plots=comparison_plots,
keyword_groups=organized_keyword_groups
)
# 调试检查生成的HTML内容
if comparison_plot_images:
if "comparison_plot_images" in html_content or "时间序列对比" in html_content:
print("对比图已成功嵌入HTML")
if comparison_plots:
if "comparison_plots" in html_content or "时间序列散点" in html_content:
self._print_success(f"对比图已成功嵌入HTML")
else:
print("❌ 对比图未正确嵌入HTML")
# print("❌ 对比图未正确嵌入HTML")
self._print_error("对比图未正确嵌入HTML")
# 保存HTML文件
with open(self.html_report_path, 'w', encoding='utf-8') as f:
@@ -1323,8 +1357,8 @@ class MultiFileTestReportScatterPlotter:
self._print_success(f"HTML报告已生成: {self.html_report_path}")
self._print_success(
f"共处理 {len(self.file_infos)} 个文件,{len(test_results)} 个测试项,{total_points} 个数据点")
if len(keywords) > 1:
self._print_success(f"已生成 {len(comparison_plot_images)} 个对比图表")
if len(keyword_groups) > 1:
self._print_success(f"已生成 {len(comparison_plots)} 个对比图表{len(keyword_groups)}个关键词:{", ".join(keyword_groups)}")
def run(self) -> None:
"""运行主程序"""
@@ -1340,20 +1374,20 @@ class MultiFileTestReportScatterPlotter:
self.load_multiple_files_optimized(excel_files)
while True:
filtered_df, keywords, unique_tests = self.get_keywords() # 修改方法调用
# 修改为使用多关键词输入方法,返回关键词分组信息
filtered_df, keyword, unique_tests, keyword_groups = self.get_keywords()
if filtered_df.empty:
self._print_warning("没有数据可处理,退出程序")
break
safe_keyword_text = "_".join([self._safe_filename(k) for k in keywords]) if keywords else "all_data"
self.create_output_dir(safe_keyword_text)
self.generate_html_report(filtered_df, keywords, unique_tests) # 修改参数
self.create_output_dir(keyword)
self.generate_html_report(filtered_df, keyword, unique_tests, keyword_groups)
self._print_success("分析完成!")
print(f"📊 报告文件: {self.html_report_path}")
print(f"📁 输出目录: {self.output_dir}")
if len(keywords) > 1:
print(f"🔍 对比关键词: {', '.join(keywords)}")
if len(keyword_groups) > 1:
print(f"🔍 对比关键词/组: {', '.join(keyword_groups)}")
# 询问是否继续分析其他关键词
continue_choice = input("\n是否继续分析其他关键词?(y/n): ").strip().lower()