@@ -4,6 +4,7 @@ import sys
import time
import pandas as pd
import matplotlib . pyplot as plt
import seaborn as sns
from datetime import datetime
from matplotlib . lines import Line2D
from typing import Optional , Tuple , List , Dict , Any , Union
@@ -36,13 +37,14 @@ OPTIMIZATION_CONFIG = {
' memory_limit_gb ' : psutil . virtual_memory ( ) . available / / ( 1024 * * 3 ) * 0.7 , # 内存限制
}
# HTML模板 - 增加汇总对比报告部分
HTML_TEMPLATE = """
<!DOCTYPE html>
<html lang= " zh-CN " >
<head>
<meta charset= " UTF-8 " >
<meta name= " viewport " content= " width=device-width, initial-scale=1.0 " >
<title>测试报告分析 - {{ keywords_display }}</title>
<title>测试报告分析 - {{ keyword }}</title>
<style>
/* 样式保持不变,根据需要调整 */
body {
@@ -116,22 +118,20 @@ HTML_TEMPLATE = """
border-radius: 8px;
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
}
.compariso n-plots-container {
.s n-plots-container {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(5 00px, 1fr));
grid-template-columns: repeat(auto-fit, minmax(4 00px, 1fr));
gap: 20px;
margin: 20px 0;
}
.compariso n-plot-item {
background: white ;
border: 1px solid #e0e0e0;
border-radius: 8px;
.s n-plot-item {
background: #f8f9fa ;
padding: 15px;
border-radius: 8px;
text-align: center;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
}
.compariso n-plot-title {
font-size: 16 px;
.s n-plot-title {
font-size: 14 px;
font-weight: bold;
margin-bottom: 10px;
color: #555;
@@ -187,27 +187,48 @@ HTML_TEMPLATE = """
border-radius: 6px;
margin: 10px 0;
}
.keyword-tag {
display: inline-block ;
background: #e1f5fe ;
color: #01579b ;
padding: 4px 8 px;
border-radius: 4px;
margin: 2px;
font-size: 12px ;
.comparison-section {
background: white ;
border-radius: 10px ;
padding: 20px ;
margin-top: 30 px;
}
.comparison-plots {
display: grid ;
grid-template-columns: repeat(auto-fit, minmax(500px, 1fr));
gap: 20px;
margin: 20px 0;
}
.keyword-group {
background: #f0f8ff;
border-left: 4px solid #4169e1;
padding: 15px;
margin: 20px 0;
border-radius: 8px;
}
.keyword-title {
font-size: 18px;
font-weight: bold;
color: #4169e1;
margin-bottom: 10px;
}
.sub-test-card {
background: #f8f9fa;
border-radius: 8px;
padding: 15px;
margin: 10px 0;
border-left: 3px solid #667eea;
}
</style>
</head>
<body>
<div class= " header " >
<h1>📊 测试报告分析 (多文件合并)</h1>
<p>关键词:
{ % f or keyword in keywords % }
<span class= " keyword-tag " > {{ keyword }}</span>
{ % e ndfor % }
</p>
<p>生成时间: {{ timestamp }}</p>
<p>关键词: <strong> {{ keyword }}</strong> | 生成时间: {{ timestamp }}</p>
<p>共分析 {{ test_count }} 个测试项, {{ total_points }} 个数据点,来自 {{ file_count }} 个文件</p>
{ % i f keyword_groups|length > 1 % }
<p>关键词分组: {{ keyword_groups|length }} 组</p>
{ % e ndif % }
</div>
<div class= " summary " >
@@ -221,98 +242,114 @@ HTML_TEMPLATE = """
{ % e ndfor % }
</div>
<!-- 多 关键词对比图 - 移动到更显眼的位置 -->
{ % i f comparison_plot_images and comparison_plot_images|length > 0 % }
<div class= " test-card " >
<div class= " section -title" >🔍 多 关键词对比视图 </div>
<div class= " comparison-plots-container " >
{ % f or comparison_plot in comparison_plot_images % }
<div class= " comparison-plot-item " >
<div class=" comparison-plot-title " > {{ comparison_plot.title }}</div >
<img src= " data:image/png;base64, {{ comparison_plot.image }} "
alt= " {{ comparison_plot.title }} "
class= " plot-image "
style= " max-width: 100 % ; height: auto; " >
<!-- 按 关键词分组显示测试项 -->
{ % f or group_name, group_tests in keyword_groups.items() % }
<div class= " keyword-group " >
<div class= " keyword -title" >🔍 关键词组: {{ group_name }} </div>
<!-- 显示该关键词组下的所有测试项 -->
{ % f or test_name, test_list in group_tests.items() % }
<div class= " sub-test-card " >
<div class= " test-header " >
<div class= " test-title " >📋 {{ test_name }}</div>
{ % i f test_list[0].status % }
<div class= " badge badge- {{ test_list[0].status }} " >
{{ test_list[0].status_display }}
</div>
{ % e ndif % }
</div>
{ % f or test in test_list % }
<div class= " test-stats " >
<div class= " stat-item " >
<div class= " stat-label " >数据点数</div>
<div class= " stat-value " > {{ test.stats.count }}</div>
</div>
<div class= " stat-item " >
<div class= " stat-label " >平均值</div>
<div class= " stat-value " > {{ " %.4f " |format(test.stats.mean) }}</div>
</div>
<div class= " stat-item " >
<div class= " stat-label " >中位数</div>
<div class= " stat-value " > {{ " %.4f " |format(test.stats.median) }}</div>
</div>
<div class= " stat-item " >
<div class= " stat-label " >标准差</div>
<div class= " stat-value " > {{ " %.4f " |format(test.stats.std) }}</div>
</div>
<div class= " stat-item " >
<div class= " stat-label " >最小值</div>
<div class= " stat-value " > {{ " %.4f " |format(test.stats.min) }}</div>
</div>
<div class= " stat-item " >
<div class= " stat-label " >最大值</div>
<div class= " stat-value " > {{ " %.4f " |format(test.stats.max) }}</div>
</div>
</div>
{ % i f test.limits.lower is not none or test.limits.upper is not none % }
<div class= " test-stats " >
{ % i f test.limits.lower is not none % }
<div class= " stat-item " >
<div class= " stat-label " >下限值</div>
<div class= " stat-value " > {{ " %.4f " |format(test.limits.lower) }}</div>
</div>
{ % e ndif % }
{ % i f test.limits.upper is not none % }
<div class= " stat-item " >
<div class= " stat-label " >上限值</div>
<div class= " stat-value " > {{ " %.4f " |format(test.limits.upper) }}</div>
</div>
{ % e ndif % }
</div>
{ % e ndif % }
<!-- 汇总图 -->
<div class= " section-title " >📈 汇总视图 (所有SN)</div>
<div class= " plot-container " >
<img src= " data:image/png;base64, {{ test.summary_plot_image }} " alt= " {{ test.name }} 汇总散点图 " class= " plot-image " >
</div>
<!-- SN独立图 -->
{ % i f test.sn_plot_images % }
<div class= " section-title " >🔍 SN独立视图 ( {{ test.sn_plot_images|length }}个SN)</div>
<div class= " sn-plots-container " >
{ % f or sn_plot in test.sn_plot_images % }
<div class= " sn-plot-item " >
<div class= " sn-plot-title " >SN: {{ sn_plot.sn }}</div>
<img src= " data:image/png;base64, {{ sn_plot.image }} " alt= " {{ test.name }} - SN {{ sn_plot.sn }} 散点图 " class= " plot-image " >
</div>
{ % e ndfor % }
</div>
{ % e ndif % }
{ % i f not loop.last % }
<hr style= " margin: 20px 0; border: none; border-top: 1px solid #e0e0e0; " >
{ % e ndif % }
{ % e ndfor % }
</div>
{ % e ndfor % }
</div>
{ % e ndfor % }
<!-- 汇总对比报告 -->
{ % i f comparison_plots % }
<div class= " comparison-section " >
<h2>📊 汇总对比报告</h2>
<div class= " comparison-plots " >
{ % f or plot in comparison_plots % }
<div class= " plot-container " >
<h3> {{ plot.title }}</h3>
<img src= " data:image/png;base64, {{ plot.image }} " alt= " {{ plot.title }} " class= " plot-image " >
{ % i f plot.description % }
<p> {{ plot.description }}</p>
{ % e ndif % }
</div>
{ % e ndfor % }
</div>
</div>
{ % e ndif % }
{ % f or test in tests % }
<div class= " test-card " >
<div class= " test-header " >
<div class= " test-title " >📋 {{ test.name }}</div>
<div class= " badge badge- {{ test.status }} " >
{{ test.status_display }}
</div>
</div>
<div class= " test-stats " >
<div class= " stat-item " >
<div class= " stat-label " >数据点数</div>
<div class= " stat-value " > {{ test.stats.count }}</div>
</div>
<div class= " stat-item " >
<div class= " stat-label " >平均值</div>
<div class= " stat-value " > {{ " %.4f " |format(test.stats.mean) }}</div>
</div>
<div class= " stat-item " >
<div class= " stat-label " >中位数</div>
<div class= " stat-value " > {{ " %.4f " |format(test.stats.median) }}</div>
</div>
<div class= " stat-item " >
<div class= " stat-label " >标准差</div>
<div class= " stat-value " > {{ " %.4f " |format(test.stats.std) }}</div>
</div>
<div class= " stat-item " >
<div class= " stat-label " >最小值</div>
<div class= " stat-value " > {{ " %.4f " |format(test.stats.min) }}</div>
</div>
<div class= " stat-item " >
<div class= " stat-label " >最大值</div>
<div class= " stat-value " > {{ " %.4f " |format(test.stats.max) }}</div>
</div>
</div>
{ % i f test.limits.lower is not none or test.limits.upper is not none % }
<div class= " test-stats " >
{ % i f test.limits.lower is not none % }
<div class= " stat-item " >
<div class= " stat-label " >下限值</div>
<div class= " stat-value " > {{ " %.4f " |format(test.limits.lower) }}</div>
</div>
{ % e ndif % }
{ % i f test.limits.upper is not none % }
<div class= " stat-item " >
<div class= " stat-label " >上限值</div>
<div class= " stat-value " > {{ " %.4f " |format(test.limits.upper) }}</div>
</div>
{ % e ndif % }
</div>
{ % e ndif % }
<!-- 汇总图 -->
<div class= " section-title " >📈 汇总视图 (所有SN)</div>
<div class= " plot-container " >
<img src= " data:image/png;base64, {{ test.summary_plot_image }} " alt= " {{ test.name }} 汇总散点图 " class= " plot-image " >
</div>
<!-- SN独立图 -->
{ % i f test.sn_plot_images % }
<div class= " section-title " >🔍 SN独立视图 ( {{ test.sn_plot_images|length }}个SN)</div>
<div class= " comparison-plots-container " >
{ % f or sn_plot in test.sn_plot_images % }
<div class= " comparison-plot-item " >
<div class= " comparison-plot-title " >SN: {{ sn_plot.sn }}</div>
<img src= " data:image/png;base64, {{ sn_plot.image }} " alt= " {{ test.name }} - SN {{ sn_plot.sn }} 散点图 " class= " plot-image " >
</div>
{ % e ndfor % }
</div>
{ % e ndif % }
</div>
{ % e ndfor % }
<div class= " summary " >
<h3>📈 分析摘要</h3>
<div class= " summary-item " >
@@ -322,10 +359,12 @@ HTML_TEMPLATE = """
<strong>分析时间:</strong> {{ analysis_time }}秒
</div>
<div class= " summary-item " >
<strong>关键词:</strong>
{ % f or keyword in keywords % }
<span class= " keyword-tag " > {{ keyword }}</span>
{ % e ndfor % }
<strong>关键词分组 :</strong> {{ keyword_groups|length }} 组
<ul>
{ % f or group_name, group_tests in keyword_groups.items() % }
<li> {{ group_name }}: {{ group_tests|length }} 个测试项</li>
{ % e ndfor % }
</ul>
</div>
<div class= " summary-item " >
<strong>测试项分布:</strong>
@@ -363,6 +402,7 @@ class MultiFileTestReportScatterPlotter:
self . col_lower : Optional [ str ] = None
self . col_upper : Optional [ str ] = None
self . html_report_path : Optional [ str ] = None
self . comparison_report_path : Optional [ str ] = None
self . file_infos : List [ Dict [ str , Any ] ] = [ ]
# 缓存处理过的数据
@@ -517,8 +557,6 @@ class MultiFileTestReportScatterPlotter:
self . _print_error ( f " 加载文件失败 { filename } : { e } " )
return None
def _find_column_case_insensitive ( self , candidates : List [ str ] ) - > Optional [ str ] :
""" 优化的大小写不敏感列查找 """
if self . df is None :
@@ -608,7 +646,7 @@ class MultiFileTestReportScatterPlotter:
else :
# 顺序加载
for i , file_info in enumerate ( file_infos , 1 ) :
self . _print_progress ( i , len ( excel_file s) , " 加载文件 " )
self . _print_progress ( i , len ( file_info s) , " 加载文件 " )
df = self . _load_single_file_optimized ( file_info )
if df is not None :
all_dataframes . append ( df )
@@ -649,47 +687,50 @@ class MultiFileTestReportScatterPlotter:
self . col_upper = self . _find_column_case_insensitive ( [
" Upper Limit " , " upper limit " , " upper_limit " , " ul " , " upper "
] )
def get_keywords ( self ) - > Tuple [ pd . DataFrame , List [ str ] , List [ str ] ] :
""" 获取用户输入的关键词并筛选数据 - 修改为支持多个关键词 """
def get_keywords ( self ) - > Tuple [ pd . DataFrame , str , List [ str ] , Dict [ str , List [ str ] ] ] :
""" 获取用户输入的多个关键词并筛选数据,返回关键词分组信息 """
self . _print_stage ( " 筛选关键词 " )
while True :
keyword_input = input ( " 请输入一个或多个关键词(用逗号分隔 ,匹配 ' Test Name New ' ): " ) . strip ( )
keyword_input = input ( " 请输入筛选关键词(多个关键词用 ' , ' 分割 ,匹配 ' Test Name New ' ): " ) . strip ( )
if not keyword_input :
print ( " ❌ 关键词不能为空,请重新输入 " )
continue
# 分割关键词
keywords = [ k . strip ( ) for k in keyword_input . split ( ' , ' ) if k . strip ( ) ]
if not keywords :
print ( " ❌ 没有有效的关键词,请重新输入 " )
continue
# 检查数据框是否为空
if self . df is None or self . df . empty :
if self . df . empty :
print ( " ⚠️ 数据框为空,无法进行筛选 " )
return pd . DataFrame ( ) , keywords , [ ]
return pd . DataFrame ( ) , keyword_input , [ ] , { }
# 检查列是否存在
if " Test Name New " not in self . df . columns :
print ( " ❌ 列 ' Test Name New ' 不存在于数据框中 " )
print ( f " 可用列: { list ( self . df . columns ) } " )
return pd . DataFrame ( ) , keywords , [ ]
return pd . DataFrame ( ) , keyword_input , [ ] , { }
try :
# 创建多个关键词的筛选条件
# 分割关键词
keywords = [ k . strip ( ) for k in keyword_input . split ( ' , ' ) if k . strip ( ) ]
# 构建多个关键词的筛选条件
mask = pd . Series ( False , index = self . df . index )
keyword_groups = { }
for keyword in keywords :
keyword_mask = self . df [ " Test Name New " ] . astype ( str ) . str . contains ( keyword , case = False , na = False )
mask = mask | keyword_mask
# 记录每个关键词匹配的测试项
matched_tests = self . df . loc [ keyword_mask , " Test Name New " ] . unique ( ) . tolist ( )
keyword_groups [ keyword ] = matched_tests
filtered_df = self . df . loc [ mask ] . copy ( )
if filtered_df . empty :
# 提供友好的提示和建议
print ( f " ⚠️ 没有找到包含关键词 ' { ' , ' . join ( keywords ) } ' 的测试项 " )
print ( f " ⚠️ 没有找到包含关键词 ' { keyword_input } ' 的测试项 " )
# 显示部分可用的测试项作为参考
available_tests = self . df [ " Test Name New " ] . dropna ( ) . unique ( )
@@ -707,16 +748,21 @@ class MultiFileTestReportScatterPlotter:
elif choice == " 2 " :
filtered_df = self . df . copy ( )
unique_tests = filtered_df [ " Test Name New " ] . unique ( ) . tolist ( )
# 为所有数据创建一个默认分组
keyword_groups = { " 所有测试项 " : unique_tests }
print ( f " ✅ 使用所有数据: { len ( filtered_df ) } 行, { len ( unique_tests ) } 个测试项 " )
return filtered_df , keywords , unique_tests
return filtered_df , " " , unique_tests , keyword_groups
else :
print ( " 👋 退出筛选操作 " )
return pd . DataFrame ( ) , keywords , [ ]
return pd . DataFrame ( ) , keyword_input , [ ] , { }
else :
unique_tests = filtered_df [ " Test Name New " ] . unique ( ) . tolist ( )
print ( f " ✅ 匹配到 { len ( filtered_df ) } 行数据,涉及 { len ( unique_tests ) } 个不同测试项 " )
print ( f " 使用的 关键词: { ' , ' . joi n( keywords ) } " )
return filtered_df , keywords , unique_tests
print ( f " 📊 关键词分组 : { le n( keyword_group s ) } 组 ")
for keyword , tests in keyword_groups . items ( ) :
print ( f " - ' { keyword } ' : { len ( tests ) } 个测试项 " )
return filtered_df , keyword_input , unique_tests , keyword_groups
except Exception as e :
print ( f " ❌ 筛选过程中发生错误: { e } " )
@@ -735,14 +781,17 @@ class MultiFileTestReportScatterPlotter:
safe_keyword = self . _safe_filename ( keyword ) if keyword else " all_data "
self . html_report_path = os . path . join ( self . output_dir , f " { safe_keyword } _report_ { timestamp } .html " )
# 为汇总对比报告创建单独的文件路径
self . comparison_report_path = os . path . join ( self . output_dir ,
f " { safe_keyword } _comparison_report_ { timestamp } .html " )
os . makedirs ( self . output_dir , exist_ok = True )
print ( f " 输出目录: { self . output_dir } " )
@staticmethod
def _safe_filename ( name : str ) - > str :
""" 生成安全的文件名 """
safe = " " . join ( c for c in str ( name ) if c . isalnum ( ) or c in ( " " , " _ " , " - " ) ) . strip ( )
safe = " " . join ( c for c in str ( name ) if c . isalnum ( ) or c in ( " , " , " " , " _ " , " - " ) ) . strip ( )
return safe or " Unknown_Test "
def _extract_limits ( self , df_one_test : pd . DataFrame ) - > Tuple [
@@ -766,10 +815,9 @@ class MultiFileTestReportScatterPlotter:
return lower_plot , upper_plot , lower_set , upper_set
@staticmethod
def _clean_and_convert_series ( series : pd . Series , target_type : str = ' numeric ' ) - > pd . Series :
""" 统一的系列清洗和转换方法 - 修复了 ast 方法名错误 """
""" 统一的系列清洗和转换方法 """
if series . empty :
return series
@@ -778,7 +826,7 @@ class MultiFileTestReportScatterPlotter:
if pd . api . types . is_numeric_dtype ( series ) :
return series . astype ( float )
# 批量字符串处理 - 修复这里的问题
# 批量字符串处理
cleaned = series . astype ( str ) . str . replace ( r ' [, ] ' , ' ' , regex = True ) . str . strip ( )
return pd . to_numeric ( cleaned , errors = ' coerce ' )
@@ -787,7 +835,6 @@ class MultiFileTestReportScatterPlotter:
return series
@staticmethod
def _convert_to_datetime ( series : pd . Series ) - > pd . Series :
""" 优化的日期时间转换 """
@@ -877,25 +924,6 @@ class MultiFileTestReportScatterPlotter:
valid_data = test_data . dropna ( subset = [ ' Measurement_num ' , ' TestTime_dt ' ] )
return valid_data . sort_values ( ' TestTime_dt ' )
def _preprocess_data ( self , test_data : pd . DataFrame ) - > pd . DataFrame :
""" 数据预处理 - 简化版本用于对比图 """
test_data = test_data . copy ( )
# 数值转换
test_data [ ' Measurement_num ' ] = self . _clean_and_convert_series (
test_data [ ' Measurement ' ] , ' numeric '
)
test_data [ ' TestTime_dt ' ] = self . _clean_and_convert_series (
test_data [ ' Test Time ' ] , ' datetime '
)
# 去除无效数据
valid_data = test_data . dropna ( subset = [ ' Measurement_num ' , ' TestTime_dt ' ] )
return valid_data
def _calculate_statistics ( self , y_data : pd . Series ) - > Dict [ str , float ] :
""" 计算统计信息 """
stats = {
@@ -1010,7 +1038,6 @@ class MultiFileTestReportScatterPlotter:
return sn_plots
def _determine_test_status ( self , stats : Dict [ str , float ] ,
lower_limit : Optional [ float ] ,
upper_limit : Optional [ float ] ) - > Dict [ str , Any ] :
@@ -1030,207 +1057,220 @@ class MultiFileTestReportScatterPlotter:
return { " status " : status , " status_display " : status_display }
def _create_comparison_plots ( self , filtered_df : pd . DataFrame , keywords : Lis t[ str ] ) - > List [Dict [ str , str ] ] :
""" 创建多关键词对比图 - 优化版本:时间序列改为散点图 """
def _create_comparison_plots ( self , filtered_df : pd . DataFrame , keyword_group s : Dic t[ str , List [ str ] ]) - > List [
Dict [ str , str ] ] :
""" 创建汇总对比报告图表,按关键词和测试项分组显示 """
comparison_plots = [ ]
if len ( keywords ) < = 1 :
return comparison_plots # 单一关键词不需要对比图
if filtered_df . empty or " Test Name New " not in filtered_df . columns :
return comparison_plots
try :
# 确保有足够的数据
if filtered_df . empty :
return comparison_plots
# 预处理数据
processed_data = self . _preprocess_test_data ( filtered_df . copy ( ) )
# 1. 散点图对比(原时间序列图改为散点图)
fig1 , ax1 = plt . subplots ( figsize = ( 14 , 8 ) )
has_data = False
if processed_data . empty :
return comparison_plots
# 使用更鲜艳的颜色
colors = plt . cm . Set3 ( np . linspace ( 0 , 1 , len ( keyword s) ) )
markers = [ ' o ' , ' s ' , ' ^ ' , ' D ' , ' v ' , ' < ' , ' > ' , ' p ' , ' * ' , ' h ' ] # 多种标记形状
# 1. 时间序列散点图 - 按关键词和测试项分别显示
num_tests_total = sum ( len ( tests ) for tests in keyword_groups . value s( ) )
if num_tests_total > 0 :
# 计算适当的子图布局
max_cols = 2
num_rows = ( num_tests_total + max_cols - 1 ) / / max_cols
# 获取全局时间范围用于统一x轴
global_min_time = None
global_max_time = None
fig1 , axes = plt . subplots ( num_rows , max_cols , figsize = ( 16 , 6 * num_rows ) )
if num_rows == 1 :
axes = axes . reshape ( 1 , - 1 )
elif num_tests_total == 1 :
axes = np . array ( [ [ axes ] ] )
# 先收集所有数据的时间范围
for keyword in keywords :
keyword_mask = filtered_df [ " Test Name New " ] . astype ( str ) . str . contains (
re . escape ( keyword ) , case = False , na = False
)
keyword_data = filtered_df . loc [ keyword_mask ] . copy ( )
axes = axes . flatten ( )
if not keyword_data . empty :
keyword_data = self . _preprocess_test_data ( keyword_data )
if not keyword_data . empty :
time_min = keyword_data [ ' TestTime_dt ' ] . min ( )
time_max = keyword_data [ ' TestTime_dt ' ] . max ( )
plot_idx = 0
colors = plt . cm . Set3 ( np . linspace ( 0 , 1 , 12 ) )
if global_min_ti me is None or time_min < global_min_time :
global_min_ti me = time_min
if g lobal_max_time is None or time_max > global_max_time :
global_max_time = time_max
for keyword , test_na mes in keyword_groups . items ( ) :
for test_na me in test_names :
if p lot_idx > = len ( axes ) :
break
for i , keyword in enumerate ( keywords ) :
keyword_mask = filter ed_df [ " Test Name New " ] . astype ( str ) . str . contains (
re . escape ( keyword ) , case = False , na = False
)
keyword_data = filtered_df . loc [ keyword_mask ] . copy ( )
# 筛选当前测试项的数据
test_data = processed_data [ process ed_data [ " Test Name New " ] == test_name ]
if not keyword _data. empty :
keyword_data = self . _preprocess_test_data ( keyword_data )
if not keyword_data . empty :
# 对散点图数据添加少量随机抖动,避免完全重叠
if len ( keyword_data ) > 1 :
# 为相同时间点的数据添加微小的时间偏移,避免重叠
time_jitter = pd . Timedelta ( minutes = 1 ) # 1分钟抖动
jitter_range = np . random . uniform ( - 0.5 , 0.5 , len ( keyword_data ) ) * time_jitter
keyword_data [ ' TestTime_dt_jittered ' ] = keyword_data [ ' TestTime_dt ' ] + jitter_range
x_values = keyword_data [ ' TestTime_dt_jittered ' ]
else :
x_values = keyword_data [ ' TestTime_dt ' ]
if test _data. empty :
continue
y_values = keyword_data [ ' Measurement_num ' ]
ax = axes [ plot_idx ]
if len ( x_values ) > 0 :
# 使用散点图,设置不同的标记和透明度
scatter = ax1 . scatter ( x_values , y_values ,
label = f " { keyword } (n= { len ( keyword_data ) } ) " ,
color = colors [ i ] ,
marker = markers [ i % len ( markers ) ] ,
s = 40 , # 点的大小
alpha = 0.7 , # 透明度
edgecolors = ' white ' , # 边缘颜色
linewidth = 0.5 ) # 边缘线宽
has_data = True
# 按SN分组绘制
sn_groups = test_data . groupby ( " SN " )
color_idx = 0
# 为每个关键词添加趋势线(可选)
if len ( keyword_data ) > = 3 :
try :
# 按时间排序
sorted_data = keyword_data . sort_values ( ' TestTime_dt ' )
# 计算简单线性趋势
x_numeric = pd . to_numeric ( sorted_data [ ' TestTime_dt ' ] )
y_trend = sorted_data [ ' Measurement_num ' ]
for sn , group in sn_groups :
ax . scatter ( group [ ' TestTime_dt ' ] , group [ ' Measurement_num ' ] ,
c = [ colors [ color_idx % len ( colors ) ] ] , label = str ( sn ) , alpha = 0.7 , s = 30 )
color_idx + = 1
if len ( x_numeric ) > = 2 :
# 使用numpy的polyfit计算趋势线
z = np . polyfit ( x_numeric , y_trend , 1 )
p = np . poly1d ( z )
ax1 . plot ( sorted_data [ ' TestTime_dt ' ] , p ( x_numeric ) ,
color = colors [ i ] , linestyle = ' -- ' ,
alpha = 0.5 , linewidth = 1 ,
label = f " { keyword } 趋势线 " )
except :
pass # 趋势线计算失败时忽略
# 添加上下限
lower_plot , upper_plot , _ , _ = self . _extract_limits ( test_data )
if lower_plot is not None :
ax . axhline ( y = lower_plot , color = ' green ' , linestyle = ' -- ' , linewidth = 2 , label = " Lower Limit " )
if upper_plot is not None :
ax . axhline ( y = upper_plot , color = ' red ' , linestyle = ' -- ' , linewidth = 2 , label = " Upper Limit " )
if has_data :
# 设置统一的x轴范围
if global_min_time and global_max_time :
# 添加一些边距
time_range = global_max_time - global_min_time
margin = time_range * 0.05
ax1 . set_xlim ( global_min_time - margin , global_max_time + margin )
ax . set_title ( f " { test_name } \n (关键词: { keyword } ) " , fontsize = 10 )
ax . set_xlabel ( " 测试时间 " )
ax . set_ylabel ( " 测量值 " )
ax . grid ( True , alpha = 0.3 )
ax . tick_params ( axis = ' x ' , rotation = 45 )
ax . legend ( fontsize = 8 )
ax1 . set_title ( " 多关键词散点图对比 " , fontsize = 14 , fontweight = ' bold ' )
ax1 . set_xlabel ( " 测试时间 " , fontsize = 12 )
ax1 . set_ylabel ( " 测量值 " , fontsize = 12 )
ax1 . grid ( True , alpha = 0.3 )
ax1 . tick_params ( axis = ' x ' , rotation = 45 )
plot_idx + = 1
# 优化图例显示
ax1 . legend ( bbox_to_anchor = ( 1.05 , 1 ) , loc = ' upper left ' ,
fontsize = 10 , framealpha = 0.9 )
plt . tight_layout ( )
# 隐藏多余的子图
for idx in range ( plot_idx , len ( axes ) ) :
axes [ idx ] . set_visible ( False )
comparison_plots . append ( {
" title " : " 散点图对比 " ,
" imag e" : self . _plot_to_base64 ( fig1 )
} )
plt . tight_layout ( )
comparison_plots . append ( {
" titl e" : " 时间序列散点图(按测试项分组) " ,
" image " : self . _plot_to_base64 ( fig1 ) ,
" description " : " 每个测试项单独显示, 按SN区分不同数据点 "
} )
# 2. 箱线图对比(简化版)
plot_data = [ ]
labels = [ ]
# 2. 箱线图 - 按关键词分组,每个关键词组内的测试项分别显示
if len ( keyword_groups ) > 0 :
fig2 , axes = plt . subplots ( len ( keyword_groups ) , 1 , figsize = ( 14 , 6 * len ( keyword_groups ) ) )
if len ( keyword_groups ) == 1 :
axes = [ axes ]
for keyword in keywords :
keyword_mask = filtered_df [ " Test Name New " ] . astype ( str ) . str . contains (
re . escape ( keyword ) , case = False , na = False
)
keyword_data = filtered_df . loc [ keyword_mask ] . copy ( )
for idx , ( keyword , test_names ) in enumerate ( keyword_groups . items ( ) ) :
ax = axes [ idx ]
if not keyword_data . empty :
keyword_data = self . _preprocess_test_data ( keyword_data )
if not keyword_data . empty and len ( keyword_data ) > = 3 : # 至少3个数据点
plot_data . append ( keyword_data [ ' Measurement_num ' ] . values )
labels . append ( f " { keyword } \n (n= { len ( keyword_data ) } ) " )
boxplot_data = [ ]
boxplot_labels = [ ]
if len ( plot_data ) > = 2 : # 至少两个关键词有数据
fig2 , ax2 = plt . subplots ( figsize = ( 10 , 6 ) )
bp = ax2 . boxplot ( plot_data , tick_labels = labels , patch_artist = True )
for test_name in test_names :
test_data = processed_data [ processed_data [ " Test Name New " ] == test_name ]
if not test_data . empty :
boxplot_data . append ( test_data [ ' Measurement_num ' ] . values )
boxplot_labels . append ( test_name )
colors = plt . cm . Set3 ( np . linspace ( 0 , 1 , len ( plot_data) ) )
for i , box in enumerate ( bp [ ' boxes ' ] ) :
box . set ( facecolor = colors [ i ] , alpha = 0.7 )
if box plot_data:
box_plot = ax . boxplot ( boxplot_data , tick_labels = boxplot_labels , patch_artist = True )
ax2 . set_title ( " 多关键词箱线图对比 " , fontsize = 14 , fontweight = ' bold ' )
ax2 . s et_ylabel ( " 测量值 " , fontsize = 12 )
ax2 . grid ( True , alpha = 0.3 )
plt . tight_layout ( )
# 设置箱线图颜色
colors_box = plt . cm . S et3 ( np . linspace ( 0 , 1 , len ( boxplot_data ) ) )
for patch , color in zip ( box_plot [ ' boxes ' ] , colors_box ) :
patch . set_facecolor ( color )
comparison_plots . append ( {
" title " : " 箱线图对比 " ,
" image " : self . _plot_to_base64 ( fig2 )
} )
# 添加上下限线(使用第一个测试项的数据)
if boxplot_data :
first_test_data = processed_data [ processed_data [ " Test Name New " ] == test_names [ 0 ] ]
lower_plot , upper_plot , _ , _ = self . _extract_limits ( first_test_data )
if lower_plot is not None :
ax . axhline ( y = lower_plot , color = ' green ' , linestyle = ' -- ' , linewidth = 2 , label = " Lower Limit " )
if upper_plot is not None :
ax . axhline ( y = upper_plot , color = ' red ' , linestyle = ' -- ' , linewidth = 2 , label = " Upper Limit " )
# 3. 分布直方图对比
fig3 , ax3 = plt . subplots ( figsize = ( 12 , 6 ) )
has_hist_data = False
ax . set_title ( f " 箱线图 - { keyword } 组 " , fontsize = 12 )
ax . set_ylabel ( " 测量值 " )
ax . tick_params ( axis = ' x ' , rotation = 45 )
ax . grid ( True , alpha = 0.3 )
ax . legend ( )
colors = plt . cm . Set3 ( np . linspace ( 0 , 1 , len ( keywords ) ) )
plt . tight_layout ( )
comparison_plots . append ( {
" title " : " 箱线图(按关键词分组) " ,
" image " : self . _plot_to_base64 ( fig2 ) ,
" description " : " 每个关键词组单独显示,组内测试项分别绘制箱线图 "
} )
for i , keyword in enumerate ( keywords ) :
keyword_mask = filtered_df [ " Test Name New " ] . astype ( str ) . str . contains (
re . escape ( keyword ) , case = False , na = False
)
keyword_data = filtered_df . loc [ keyword_mask ] . copy ( )
# 3. 概率分布直方图 - 按测试项分组显示
if num_tests_total > 0 :
max_cols = 2
num_rows = ( num_tests_total + max_cols - 1 ) / / max_cols
if not keyword_data . empty :
keyword_data = self . _preprocess_test_data ( keyword_data )
if not keyword_data . empty :
# 动态调整直方图bins数量
n_bins = min ( 20 , max ( 5 , len ( keyword_data ) / / 10 ) )
ax3 . hist ( keyword_data [ ' Measurement_num ' ] . values ,
bins = n_bins ,
alpha = 0.6 ,
label = f " { keyword } (n= { len ( keyword_data ) } ) " ,
color = colors [ i ] ,
density = True ) # 使用密度而不是频次
has_hist_data = True
fig3 , axes = plt . subplots ( num_rows , max_cols , figsize = ( 16 , 6 * num_rows ) )
if num_rows == 1 :
axes = axes . reshape ( 1 , - 1 )
elif num_tests_total == 1 :
axes = np . array ( [ [ axes ] ] )
if has_hist_data :
ax3 . set_title ( " 多关键词分布直方图对比 " , fontsize = 14 , fontweight = ' bold ' )
ax3 . set_xlabel ( " 测量值 " , fontsize = 12 )
ax3 . set_ylabel ( " 密度 " , fontsize = 12 )
ax3 . legend ( )
ax3 . grid ( True , alpha = 0.3 )
plt . tight_layout ( )
axes = axes . flatten ( )
comparison_plots . append ( {
" title " : " 分布直方图对比 " ,
" image " : self . _plot_to_base64 ( fig3 )
} )
plot_idx = 0
colors_hist = plt . cm . Set3 ( np . linspace ( 0 , 1 , 12 ) )
except Exception as e :
self . _print_warning ( f " 创建对比图时出错: { e } " )
import traceback
traceback . print_exc ( )
for keyword , test_names in keyword_groups . items ( ) :
for test_name in test_names :
if plot_idx > = len ( axes ) :
break
test_data = processed_data [ processed_data [ " Test Name New " ] == test_name ]
if test_data . empty or len ( test_data ) < 2 :
continue
ax = axes [ plot_idx ]
# 绘制概率分布直方图
sns . histplot ( test_data [ ' Measurement_num ' ] , kde = True ,
color = colors_hist [ plot_idx % len ( colors_hist ) ] , alpha = 0.7 , ax = ax )
# 添加上下限线
lower_plot , upper_plot , _ , _ = self . _extract_limits ( test_data )
if lower_plot is not None :
ax . axvline ( x = lower_plot , color = ' green ' , linestyle = ' -- ' , linewidth = 2 , label = " Lower Limit " )
if upper_plot is not None :
ax . axvline ( x = upper_plot , color = ' red ' , linestyle = ' -- ' , linewidth = 2 , label = " Upper Limit " )
ax . set_title ( f " { test_name } \n (关键词: { keyword } ) " , fontsize = 10 )
ax . set_xlabel ( " 测量值 " )
ax . set_ylabel ( " 频率 " )
ax . grid ( True , alpha = 0.3 )
ax . legend ( )
plot_idx + = 1
# 隐藏多余的子图
for idx in range ( plot_idx , len ( axes ) ) :
axes [ idx ] . set_visible ( False )
plt . tight_layout ( )
comparison_plots . append ( {
" title " : " 概率分布直方图(按测试项分组) " ,
" image " : self . _plot_to_base64 ( fig3 ) ,
" description " : " 每个测试项单独显示概率分布和上下限 "
} )
return comparison_plots
def generate_html_report ( self , filtered_df : pd . DataFrame , keywords : List [ str ] ,
unique_tests : List [ str ] ) - > None :
""" 生成HTML报告 - 修复对比图显示问题 """
def _organize_tests_by_keyword_groups ( self , test_results : List [ Dict [ str , Any ] ] ,
keyword_groups : Dict [ str , List [ str ] ] ) - > Dict [
str , Dict [ str , List [ Dict [ str , Any ] ] ] ] :
""" 按关键词分组组织测试结果,每个测试项单独显示 """
organized_groups = { }
# 构建测试名称到测试结果的映射
test_name_to_result = { test [ ' name ' ] : test for test in test_results }
# 为每个关键词组分配测试结果
for keyword , test_names in keyword_groups . items ( ) :
group_tests = { }
for test_name in test_names :
if test_name in test_name_to_result :
# 每个测试项单独作为一个分组
group_tests [ test_name ] = [ test_name_to_result [ test_name ] ]
if group_tests :
organized_groups [ keyword ] = group_tests
return organized_groups
def generate_html_report ( self , filtered_df : pd . DataFrame , keyword : str ,
unique_tests : List [ str ] , keyword_groups : Dict [ str , List [ str ] ] ) - > None :
""" 生成HTML报告 """
self . _print_stage ( " 生成HTML报告 " )
start_time = time . time ( )
@@ -1238,16 +1278,6 @@ class MultiFileTestReportScatterPlotter:
total_points = 0
status_counts = { " success " : 0 , " warning " : 0 , " danger " : 0 }
# 生成多关键词对比图
print ( f " 🔍 调试: 开始生成对比图,关键词数量: { len ( keywords ) } " )
comparison_plot_images = self . _create_comparison_plots ( filtered_df , keywords )
print ( f " 🔍 调试: 对比图生成完成,数量: { len ( comparison_plot_images ) } " )
# 调试输出对比图信息
for i , plot in enumerate ( comparison_plot_images ) :
print ( f " - 对比图 { i + 1 } : { plot [ ' title ' ] } , 图像大小: { len ( plot [ ' image ' ] ) } 字符 " )
# 生成各测试项的详细图表
for i , test_name in enumerate ( unique_tests , 1 ) :
self . _print_progress ( i , len ( unique_tests ) , " 生成测试报告 " )
@@ -1287,14 +1317,16 @@ class MultiFileTestReportScatterPlotter:
" status_display " : status_info [ " status_display " ]
} )
# 渲染HTML模板前再次验证数据
print ( f " 🔍 调试: 传递给模板的对比图数量: { len ( comparison_plot_images ) } " )
# 按关键词分组组织测试结果,每个测试项单独显示
organized_keyword_groups = self . _organize_tests_by_keyword_groups ( test_results , keyword_groups )
# 生成汇总对比报告图表
comparison_plots = self . _create_comparison_plots ( filtered_df , keyword_groups )
# 渲染HTML模板
template = Template ( HTML_TEMPLATE )
html_content = template . render (
keywords = keywords if keywords else [ " 所有数据 " ] ,
keywords_display = " , " . join ( keywords ) if keywords else " 所有数据 " ,
keyword = keyword if keyword else " 所有数据 " ,
timestamp = datetime . now ( ) . strftime ( " % Y- % m- %d % H: % M: % S " ) ,
test_count = len ( test_results ) ,
total_points = total_points ,
@@ -1306,15 +1338,17 @@ class MultiFileTestReportScatterPlotter:
file_count = len ( self . file_infos ) ,
file_infos = self . file_infos ,
total_rows = len ( self . df ) if self . df is not None else 0 ,
comparison_plot_image s = comparison_plot_images # 确保传递
comparison_plots = comparison_plots ,
keyword_groups = organized_keyword_groups
)
# 调试: 检查生成的HTML内容
if comparison_plot_image s :
if " comparison_plot_image s " in html_content or " 时间序列对比 图 " in html_content :
print ( " ✅ 对比图已成功嵌入HTML" )
if comparison_plots :
if " comparison_plots " in html_content or " 时间序列散点 图 " in html_content :
self . _print_success ( f " 对比图已成功嵌入HTML " )
else :
print( " ❌ 对比图未正确嵌入HTML" )
# print(" ❌ 对比图未正确嵌入HTML" )
self . _print_error ( " 对比图未正确嵌入HTML " )
# 保存HTML文件
with open ( self . html_report_path , ' w ' , encoding = ' utf-8 ' ) as f :
@@ -1323,8 +1357,8 @@ class MultiFileTestReportScatterPlotter:
self . _print_success ( f " HTML报告已生成: { self . html_report_path } " )
self . _print_success (
f " 共处理 { len ( self . file_infos ) } 个文件, { len ( test_results ) } 个测试项, { total_points } 个数据点 " )
if len ( keywords ) > 1 :
self . _print_success ( f " 已生成 { len ( comparison_plot_image s ) } 个对比图表 " )
if len ( keyword_group s ) > 1 :
self . _print_success ( f " 已生成 { len ( comparison_plots ) } 个对比图表, { len ( keyword_groups ) } 个关键词: { " , " . join ( keyword_groups ) } 。 " )
def run ( self ) - > None :
""" 运行主程序 """
@@ -1340,20 +1374,20 @@ class MultiFileTestReportScatterPlotter:
self . load_multiple_files_optimized ( excel_files )
while True :
filtered_df , keywords , unique_tests = self . get_keywords ( ) # 修改方法调用
# 修改为使用多关键词输入方法,返回关键词分组信息
filtered_df , keyword , unique_tests , keyword_groups = self . get_keywords ( )
if filtered_df . empty :
self . _print_warning ( " 没有数据可处理,退出程序 " )
break
safe_keyword_text = " _ " . join ( [ self . _safe_filename ( k ) for k in keywords ] ) if keywords else " all_data "
self . create_output_dir ( safe_keyword_text )
self . generate_html_report ( filtered_df , keywords , unique_tests ) # 修改参数
self . create_output_dir ( keyword )
self . generate_html_report ( filtered_df , keyword , unique_tests , keyword_groups )
self . _print_success ( " 分析完成! " )
print ( f " 📊 报告文件: { self . html_report_path } " )
print ( f " 📁 输出目录: { self . output_dir } " )
if len ( keywords ) > 1 :
print ( f " 🔍 对比关键词: { ' , ' . join ( keywords ) } " )
if len ( keyword_group s ) > 1 :
print ( f " 🔍 对比关键词/组 : { ' , ' . join ( keyword_group s ) } " )
# 询问是否继续分析其他关键词
continue_choice = input ( " \n 是否继续分析其他关键词?(y/n): " ) . strip ( ) . lower ( )