Python脚本开发文件初始化
This commit is contained in:
926
htmlProcess/htmlReportProcess_Merge_picHtml_V1.py
Normal file
926
htmlProcess/htmlReportProcess_Merge_picHtml_V1.py
Normal file
@@ -0,0 +1,926 @@
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
from datetime import datetime
|
||||
from matplotlib.lines import Line2D
|
||||
from typing import Optional, Tuple, List, Dict, Any, Union
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
import base64
|
||||
from io import BytesIO
|
||||
from jinja2 import Template
|
||||
from colorama import Fore, Style, init
|
||||
|
||||
# 避免 SettingWithCopy 警告影响输出可读性
|
||||
pd.options.mode.chained_assignment = None
|
||||
|
||||
# 设置中文字体支持
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans', 'Arial Unicode MS', 'Microsoft YaHei']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
|
||||
# HTML模板 - 添加了SN独立图的显示
|
||||
HTML_TEMPLATE = """
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>测试报告分析 - {{ keyword }}</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
margin: 0;
|
||||
padding: 20px;
|
||||
background-color: #f5f5f5;
|
||||
}
|
||||
.header {
|
||||
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
||||
color: white;
|
||||
padding: 20px;
|
||||
border-radius: 10px;
|
||||
margin-bottom: 20px;
|
||||
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
.test-card {
|
||||
background: white;
|
||||
border-radius: 10px;
|
||||
padding: 20px;
|
||||
margin-bottom: 20px;
|
||||
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
||||
transition: transform 0.2s ease;
|
||||
}
|
||||
.test-card:hover {
|
||||
transform: translateY(-2px);
|
||||
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15);
|
||||
}
|
||||
.test-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
margin-bottom: 15px;
|
||||
padding-bottom: 10px;
|
||||
border-bottom: 2px solid #eaeaea;
|
||||
}
|
||||
.test-title {
|
||||
font-size: 18px;
|
||||
font-weight: bold;
|
||||
color: #333;
|
||||
}
|
||||
.test-stats {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
||||
gap: 15px;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
.stat-item {
|
||||
background: #f8f9fa;
|
||||
padding: 12px;
|
||||
border-radius: 8px;
|
||||
text-align: center;
|
||||
}
|
||||
.stat-label {
|
||||
font-size: 12px;
|
||||
color: #666;
|
||||
margin-bottom: 5px;
|
||||
}
|
||||
.stat-value {
|
||||
font-size: 16px;
|
||||
font-weight: bold;
|
||||
color: #333;
|
||||
}
|
||||
.plot-container {
|
||||
text-align: center;
|
||||
margin: 20px 0;
|
||||
}
|
||||
.plot-image {
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
.sn-plots-container {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(400px, 1fr));
|
||||
gap: 20px;
|
||||
margin: 20px 0;
|
||||
}
|
||||
.sn-plot-item {
|
||||
background: #f8f9fa;
|
||||
padding: 15px;
|
||||
border-radius: 8px;
|
||||
text-align: center;
|
||||
}
|
||||
.sn-plot-title {
|
||||
font-size: 14px;
|
||||
font-weight: bold;
|
||||
margin-bottom: 10px;
|
||||
color: #555;
|
||||
}
|
||||
.summary {
|
||||
background: white;
|
||||
border-radius: 10px;
|
||||
padding: 20px;
|
||||
margin-top: 20px;
|
||||
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
.summary-item {
|
||||
margin: 10px 0;
|
||||
padding: 10px;
|
||||
background: #f8f9fa;
|
||||
border-radius: 6px;
|
||||
}
|
||||
.timestamp {
|
||||
text-align: center;
|
||||
color: #666;
|
||||
margin-top: 30px;
|
||||
font-size: 12px;
|
||||
}
|
||||
.badge {
|
||||
padding: 4px 8px;
|
||||
border-radius: 12px;
|
||||
font-size: 12px;
|
||||
font-weight: bold;
|
||||
}
|
||||
.badge-success {
|
||||
background: #d4edda;
|
||||
color: #155724;
|
||||
}
|
||||
.badge-warning {
|
||||
background: #fff3cd;
|
||||
color: #856404;
|
||||
}
|
||||
.badge-danger {
|
||||
background: #f8d7da;
|
||||
color: #721c24;
|
||||
}
|
||||
.section-title {
|
||||
font-size: 16px;
|
||||
font-weight: bold;
|
||||
margin: 20px 0 10px 0;
|
||||
color: #333;
|
||||
border-left: 4px solid #667eea;
|
||||
padding-left: 10px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="header">
|
||||
<h1>📊 测试报告分析</h1>
|
||||
<p>关键词: <strong>{{ keyword }}</strong> | 生成时间: {{ timestamp }}</p>
|
||||
<p>共分析 {{ test_count }} 个测试项,{{ total_points }} 个数据点</p>
|
||||
</div>
|
||||
|
||||
{% for test in tests %}
|
||||
<div class="test-card">
|
||||
<div class="test-header">
|
||||
<div class="test-title">📋 {{ test.name }}</div>
|
||||
<div class="badge badge-{{ test.status }}">
|
||||
{{ test.status_display }}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="test-stats">
|
||||
<div class="stat-item">
|
||||
<div class="stat-label">数据点数</div>
|
||||
<div class="stat-value">{{ test.stats.count }}</div>
|
||||
</div>
|
||||
<div class="stat-item">
|
||||
<div class="stat-label">平均值</div>
|
||||
<div class="stat-value">{{ "%.4f"|format(test.stats.mean) }}</div>
|
||||
</div>
|
||||
<div class="stat-item">
|
||||
<div class="stat-label">中位数</div>
|
||||
<div class="stat-value">{{ "%.4f"|format(test.stats.median) }}</div>
|
||||
</div>
|
||||
<div class="stat-item">
|
||||
<div class="stat-label">标准差</div>
|
||||
<div class="stat-value">{{ "%.4f"|format(test.stats.std) }}</div>
|
||||
</div>
|
||||
<div class="stat-item">
|
||||
<div class="stat-label">最小值</div>
|
||||
<div class="stat-value">{{ "%.4f"|format(test.stats.min) }}</div>
|
||||
</div>
|
||||
<div class="stat-item">
|
||||
<div class="stat-label">最大值</div>
|
||||
<div class="stat-value">{{ "%.4f"|format(test.stats.max) }}</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% if test.limits.lower is not none or test.limits.upper is not none %}
|
||||
<div class="test-stats">
|
||||
{% if test.limits.lower is not none %}
|
||||
<div class="stat-item">
|
||||
<div class="stat-label">下限值</div>
|
||||
<div class="stat-value">{{ "%.4f"|format(test.limits.lower) }}</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if test.limits.upper is not none %}
|
||||
<div class="stat-item">
|
||||
<div class="stat-label">上限值</div>
|
||||
<div class="stat-value">{{ "%.4f"|format(test.limits.upper) }}</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<!-- 汇总图 -->
|
||||
<div class="section-title">📈 汇总视图 (所有SN)</div>
|
||||
<div class="plot-container">
|
||||
<img src="data:image/png;base64,{{ test.summary_plot_image }}" alt="{{ test.name }} 汇总散点图" class="plot-image">
|
||||
</div>
|
||||
|
||||
<!-- SN独立图 -->
|
||||
{% if test.sn_plot_images %}
|
||||
<div class="section-title">🔍 SN独立视图 ({{ test.sn_plot_images|length }}个SN)</div>
|
||||
<div class="sn-plots-container">
|
||||
{% for sn_plot in test.sn_plot_images %}
|
||||
<div class="sn-plot-item">
|
||||
<div class="sn-plot-title">SN: {{ sn_plot.sn }}</div>
|
||||
<img src="data:image/png;base64,{{ sn_plot.image }}" alt="{{ test.name }} - SN {{ sn_plot.sn }} 散点图" class="plot-image">
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endfor %}
|
||||
|
||||
<div class="summary">
|
||||
<h3>📈 分析摘要</h3>
|
||||
<div class="summary-item">
|
||||
<strong>文件路径:</strong> {{ file_path }}
|
||||
</div>
|
||||
<div class="summary-item">
|
||||
<strong>分析时间:</strong> {{ analysis_time }}秒
|
||||
</div>
|
||||
<div class="summary-item">
|
||||
<strong>测试项分布:</strong>
|
||||
<ul>
|
||||
<li>正常: {{ status_counts.normal }} 个</li>
|
||||
<li>警告: {{ status_counts.warning }} 个</li>
|
||||
<li>异常: {{ status_counts.abnormal }} 个</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="timestamp">
|
||||
报告生成于 {{ timestamp }} | 测试报告分析系统
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
class TestReportScatterPlotter:
|
||||
def __init__(self):
|
||||
self.file_path: Optional[str] = None
|
||||
self.df: Optional[pd.DataFrame] = None
|
||||
self.output_dir: Optional[str] = None
|
||||
self.required_columns = ["Test Name New", "SN", "Measurement", "Test Time", "Lower Limit", "Upper Limit", ]
|
||||
self.col_lower: Optional[str] = None
|
||||
self.col_upper: Optional[str] = None
|
||||
self.html_report_path: Optional[str] = None
|
||||
|
||||
# 缓存处理过的数据
|
||||
self._processed_data_cache: Dict[str, Any] = {}
|
||||
|
||||
def _print_stage(self, msg: str) -> None:
|
||||
"""统一的阶段信息输出"""
|
||||
print(f"\n{'=' * 30}\n{msg}\n{'=' * 30}")
|
||||
|
||||
def _print_progress(self, current: int, total: int, prefix: str = "进度") -> None:
|
||||
"""改进的进度条显示"""
|
||||
if total <= 0:
|
||||
return
|
||||
|
||||
percent = (current / total) * 100
|
||||
bar_len = 30
|
||||
filled = int(bar_len * current / total)
|
||||
bar = "█" * filled + "-" * (bar_len - filled)
|
||||
sys.stdout.write(f"\r{prefix}: [{bar}] {current}/{total} ({percent:.1f}%)")
|
||||
sys.stdout.flush()
|
||||
if current == total:
|
||||
print() # 换行
|
||||
|
||||
def get_file_path(self) -> None:
|
||||
"""改进的文件路径获取,支持路径补全"""
|
||||
self._print_stage("输入文件路径")
|
||||
|
||||
while True:
|
||||
print(f"{Fore.WHITE}请输入测试报告文件路径(.xlsx): ")
|
||||
file_path = input("> ").strip()
|
||||
|
||||
# 尝试路径补全和验证
|
||||
if not file_path:
|
||||
continue
|
||||
|
||||
path_obj = Path(file_path)
|
||||
if path_obj.exists():
|
||||
self.file_path = str(path_obj.resolve())
|
||||
print(f"已选择文件: {self.file_path}")
|
||||
break
|
||||
else:
|
||||
print(f"文件不存在: {file_path},请重新输入")
|
||||
|
||||
def _find_column_case_insensitive(self, candidates: List[str]) -> Optional[str]:
|
||||
"""优化的大小写不敏感列查找"""
|
||||
if self.df is None:
|
||||
return None
|
||||
|
||||
columns_lower = {col.lower().strip(): col for col in self.df.columns}
|
||||
for candidate in candidates:
|
||||
key = candidate.lower().strip()
|
||||
if key in columns_lower:
|
||||
return columns_lower[key]
|
||||
return None
|
||||
|
||||
def load_data(self) -> None:
|
||||
"""优化的数据加载方法"""
|
||||
self._print_stage("加载数据")
|
||||
start_time = time.time()
|
||||
|
||||
# 检查文件是否存在
|
||||
if not os.path.exists(self.file_path):
|
||||
raise FileNotFoundError(f"文件不存在: {self.file_path}")
|
||||
|
||||
# 根据文件扩展名选择最优引擎
|
||||
file_ext = self.file_path.lower()
|
||||
if file_ext.endswith('.xlsx'):
|
||||
# .xlsx 文件引擎选择优先级
|
||||
engine_options = ['openpyxl', 'calamine'] # calamine需要安装并可能更快
|
||||
engine = 'openpyxl' # 默认
|
||||
elif file_ext.endswith('.xls'):
|
||||
# .xls 文件引擎选择
|
||||
engine_options = ['xlrd', 'calamine']
|
||||
engine = 'xlrd' # 默认
|
||||
else:
|
||||
raise ValueError("输入文件不是有效的 Excel 文件(应为 .xls 或 .xlsx 格式)")
|
||||
|
||||
# 快速获取工作表名称(轻量级方式)
|
||||
try:
|
||||
if engine == 'openpyxl':
|
||||
import openpyxl
|
||||
workbook = openpyxl.load_workbook(self.file_path, read_only=True)
|
||||
sheet_names = workbook.sheetnames
|
||||
workbook.close()
|
||||
elif engine == 'xlrd':
|
||||
import xlrd
|
||||
workbook = xlrd.open_workbook(self.file_path, on_demand=True)
|
||||
sheet_names = workbook.sheet_names()
|
||||
workbook.release_resources()
|
||||
else:
|
||||
# 使用pandas的轻量级方式
|
||||
excel_file = pd.ExcelFile(self.file_path, engine=engine)
|
||||
sheet_names = excel_file.sheet_names
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"无法打开 Excel 文件,请确认该文件未被损坏或占用。错误: {type(e).__name__}: {e}")
|
||||
|
||||
# 定义优先查找的工作表名
|
||||
target_sheets = ["Merged All Tests", "All Tests"]
|
||||
selected_sheet = None
|
||||
|
||||
for sheet in target_sheets:
|
||||
if sheet in sheet_names:
|
||||
selected_sheet = sheet
|
||||
break
|
||||
|
||||
if selected_sheet is None:
|
||||
raise ValueError(
|
||||
f"未找到指定的工作表: {' 或 '.join(target_sheets)}。"
|
||||
f"当前文件包含的工作表有: {sheet_names}"
|
||||
)
|
||||
|
||||
try:
|
||||
# 性能优化:使用更高效的参数设置
|
||||
read_excel_kwargs = {
|
||||
# 'filepath_or_buffer': self.file_path,
|
||||
'io': self.file_path, # 修正:使用'io'而不是'filepath_or_buffer'
|
||||
'sheet_name': selected_sheet,
|
||||
'engine': engine,
|
||||
'dtype': 'object', # 先统一读取为对象类型,减少类型推断时间
|
||||
'na_filter': False, # 禁用自动NA过滤,提高读取速度
|
||||
}
|
||||
|
||||
# 如果知道必需列,且不为空,则只读取需要的列
|
||||
if hasattr(self, 'required_columns') and self.required_columns:
|
||||
# 先检查哪些列实际存在
|
||||
try:
|
||||
# 轻量级检查列名是否存在
|
||||
sample_df = pd.read_excel(
|
||||
self.file_path,
|
||||
sheet_name=selected_sheet,
|
||||
engine=engine,
|
||||
nrows=1 # 只读取第一行来获取列名
|
||||
)
|
||||
existing_columns = [col for col in self.required_columns if col in sample_df.columns]
|
||||
|
||||
if len(existing_columns) < len(self.required_columns):
|
||||
missing = set(self.required_columns) - set(existing_columns)
|
||||
raise KeyError(f"缺少必要列: {list(missing)}")
|
||||
|
||||
read_excel_kwargs['usecols'] = existing_columns
|
||||
|
||||
# print(f"使用 read_excel_kwargs 读取excel:\n {read_excel_kwargs}")
|
||||
# 打印完整的参数信息(调试用)
|
||||
print("使用 read_excel_kwargs 读取excel:")
|
||||
for key, value in read_excel_kwargs.items():
|
||||
print(f" {key}: {repr(value)}") # 使用repr确保特殊字符正确显示
|
||||
|
||||
except Exception as e:
|
||||
print(f"列检查失败,将读取所有列: {e}")
|
||||
# 如果列检查失败,回退到读取所有列
|
||||
|
||||
|
||||
# 执行数据读取
|
||||
self._print_stage("执行数据读取")
|
||||
self.df = pd.read_excel(**read_excel_kwargs)
|
||||
|
||||
except Exception as e:
|
||||
# 如果默认引擎失败,尝试备选引擎
|
||||
print(f"引擎 {engine} 读取失败,尝试备选引擎...\n{e}")
|
||||
try:
|
||||
# 回退到基本的读取方式
|
||||
self.df = pd.read_excel(
|
||||
self.file_path,
|
||||
sheet_name=selected_sheet,
|
||||
engine=None # 让pandas自动选择
|
||||
)
|
||||
except Exception as fallback_e:
|
||||
raise RuntimeError(
|
||||
f"读取 Excel 失败,工作表: '{selected_sheet}'。"
|
||||
f"主引擎错误: {type(e).__name__}: {e}\n"
|
||||
f"备选引擎错误: {type(fallback_e).__name__}: {fallback_e}"
|
||||
)
|
||||
|
||||
if self.df.empty:
|
||||
raise ValueError("工作表为空,无法处理")
|
||||
|
||||
# 校验必要列(如果前面没有使用usecols过滤,这里需要再次检查)
|
||||
if hasattr(self, 'required_columns') and self.required_columns:
|
||||
missing_columns = [col for col in self.required_columns if col not in self.df.columns]
|
||||
if missing_columns:
|
||||
raise KeyError(f"缺少必要列: {missing_columns}")
|
||||
|
||||
# 记录上下限列名
|
||||
self.col_lower = self._find_column_case_insensitive([
|
||||
"Lower Limit", "lower limit", "lower_limit", "ll", "lower"
|
||||
])
|
||||
self.col_upper = self._find_column_case_insensitive([
|
||||
"Upper Limit", "upper limit", "upper_limit", "ul", "upper"
|
||||
])
|
||||
|
||||
loading_time = time.time() - start_time
|
||||
print(f"数据加载完成: {len(self.df)} 行 × {self.df.shape[1]} 列")
|
||||
print(f"使用引擎: {engine}")
|
||||
print(f"耗时: {loading_time:.2f}s")
|
||||
|
||||
# 显示列信息摘要
|
||||
print(f"检测到下限列: {self.col_lower or '无'}")
|
||||
print(f"检测到上限列: {self.col_upper or '无'}")
|
||||
|
||||
# 可选:类型转换(如果知道具体的数据类型)
|
||||
# self._convert_data_types()
|
||||
|
||||
# 可以添加这个方法进行类型转换优化
|
||||
def _convert_data_types(self):
|
||||
"""优化数据类型转换"""
|
||||
if self.df is None or self.df.empty:
|
||||
return
|
||||
|
||||
# 根据列名模式推断数据类型
|
||||
numeric_patterns = ['limit', 'value', 'measure', 'result', 'score']
|
||||
date_patterns = ['date', 'time', 'period']
|
||||
|
||||
for col in self.df.columns:
|
||||
col_lower = str(col).lower()
|
||||
|
||||
# 数值类型转换
|
||||
if any(pattern in col_lower for pattern in numeric_patterns):
|
||||
self.df[col] = pd.to_numeric(self.df[col], errors='coerce')
|
||||
# 日期类型转换
|
||||
elif any(pattern in col_lower for pattern in date_patterns):
|
||||
self.df[col] = pd.to_datetime(self.df[col], errors='coerce')
|
||||
|
||||
|
||||
def get_keyword(self) -> Tuple[pd.DataFrame, str, List[str]]:
|
||||
"""获取用户输入的关键词并筛选数据"""
|
||||
self._print_stage("筛选关键词")
|
||||
|
||||
while True:
|
||||
keyword = input("请输入筛选关键词(匹配 'Test Name New'): ").strip()
|
||||
|
||||
if not keyword:
|
||||
print("❌ 关键词不能为空,请重新输入")
|
||||
continue
|
||||
|
||||
# 检查数据框是否为空
|
||||
if self.df.empty:
|
||||
print("⚠️ 数据框为空,无法进行筛选")
|
||||
return pd.DataFrame(), keyword, []
|
||||
|
||||
# 检查列是否存在
|
||||
if "Test Name New" not in self.df.columns:
|
||||
print("❌ 列 'Test Name New' 不存在于数据框中")
|
||||
print(f"可用列: {list(self.df.columns)}")
|
||||
return pd.DataFrame(), keyword, []
|
||||
|
||||
try:
|
||||
mask = self.df["Test Name New"].astype(str).str.contains(keyword, case=False, na=False)
|
||||
filtered_df = self.df.loc[mask].copy()
|
||||
|
||||
if filtered_df.empty:
|
||||
# 提供友好的提示和建议,而不是直接抛出异常
|
||||
print(f"⚠️ 没有找到包含关键词 '{keyword}' 的测试项")
|
||||
|
||||
# 显示部分可用的测试项作为参考
|
||||
available_tests = self.df["Test Name New"].dropna().unique()
|
||||
if len(available_tests) > 0:
|
||||
print("📋 可用的测试项示例:")
|
||||
for test in available_tests[:5]: # 只显示前5个作为参考
|
||||
print(f" - {test}")
|
||||
if len(available_tests) > 5:
|
||||
print(f" ... 还有 {len(available_tests) - 5} 个测试项")
|
||||
|
||||
# 提供重新输入或退出的选项
|
||||
choice = input("请选择: 1-重新输入关键词 2-使用所有数据 3-退出当前操作: ")
|
||||
if choice == "1":
|
||||
continue
|
||||
elif choice == "2":
|
||||
filtered_df = self.df.copy()
|
||||
unique_tests = filtered_df["Test Name New"].unique().tolist()
|
||||
print(f"✅ 使用所有数据: {len(filtered_df)} 行,{len(unique_tests)} 个测试项")
|
||||
return filtered_df, "", unique_tests
|
||||
else:
|
||||
print("👋 退出筛选操作")
|
||||
return pd.DataFrame(), keyword, []
|
||||
else:
|
||||
unique_tests = filtered_df["Test Name New"].unique().tolist()
|
||||
print(f"✅ 匹配到 {len(filtered_df)} 行数据,涉及 {len(unique_tests)} 个不同测试项")
|
||||
return filtered_df, keyword, unique_tests
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 筛选过程中发生错误: {e}")
|
||||
print("请检查数据格式或重新输入关键词")
|
||||
continue
|
||||
|
||||
def create_output_dir(self, keyword) -> None:
|
||||
"""创建输出目录"""
|
||||
self._print_stage("创建输出目录")
|
||||
|
||||
if not self.file_path:
|
||||
raise ValueError("文件路径未设置")
|
||||
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
base_dir = os.path.dirname(self.file_path)
|
||||
# self.output_dir = os.path.join(base_dir, f"scatter_report_{timestamp}")
|
||||
self.output_dir = os.path.join(base_dir, f"scatter_report_out")
|
||||
self.html_report_path = os.path.join(self.output_dir, f"{keyword}_report_{timestamp}.html")
|
||||
|
||||
os.makedirs(self.output_dir, exist_ok=True)
|
||||
print(f"输出目录: {self.output_dir}")
|
||||
|
||||
@staticmethod
|
||||
def _safe_filename(name: str) -> str:
|
||||
"""生成安全的文件名"""
|
||||
safe = "".join(c for c in str(name) if c.isalnum() or c in (" ", "_", "-")).strip()
|
||||
return safe or "Unknown_Test"
|
||||
|
||||
def _extract_limits(self, df_one_test: pd.DataFrame) -> Tuple[
|
||||
Optional[float], Optional[float], List[float], List[float]]:
|
||||
"""提取某个测试项的上下限数值"""
|
||||
lower_plot = upper_plot = None
|
||||
lower_set = []
|
||||
upper_set = []
|
||||
|
||||
if self.col_lower and self.col_lower in df_one_test.columns:
|
||||
lower_vals = self._clean_and_convert_series(df_one_test[self.col_lower], 'numeric').dropna().unique()
|
||||
lower_set = sorted(lower_vals.tolist()) if len(lower_vals) > 0 else []
|
||||
if lower_set:
|
||||
lower_plot = min(lower_set)
|
||||
|
||||
if self.col_upper and self.col_upper in df_one_test.columns:
|
||||
upper_vals = self._clean_and_convert_series(df_one_test[self.col_upper], 'numeric').dropna().unique()
|
||||
upper_set = sorted(upper_vals.tolist()) if len(upper_vals) > 0 else []
|
||||
if upper_set:
|
||||
upper_plot = max(upper_set)
|
||||
|
||||
return lower_plot, upper_plot, lower_set, upper_set
|
||||
|
||||
@staticmethod
|
||||
def _clean_and_convert_series(series: pd.Series, target_type: str = 'numeric') -> pd.Series:
|
||||
"""统一的系列清洗和转换方法 - 修复了 ast 方法名错误"""
|
||||
if series.empty:
|
||||
return series
|
||||
|
||||
if target_type == 'numeric':
|
||||
# 数值转换优化
|
||||
if pd.api.types.is_numeric_dtype(series):
|
||||
return series.astype(float)
|
||||
|
||||
# 批量字符串处理 - 修复这里的问题
|
||||
cleaned = series.astype(str).str.replace(r'[, ]', '', regex=True).str.strip()
|
||||
return pd.to_numeric(cleaned, errors='coerce')
|
||||
|
||||
elif target_type == 'datetime':
|
||||
return TestReportScatterPlotter._convert_to_datetime(series)
|
||||
|
||||
return series
|
||||
|
||||
@staticmethod
|
||||
def _convert_to_datetime(series: pd.Series) -> pd.Series:
|
||||
"""优化的日期时间转换"""
|
||||
if pd.api.types.is_datetime64_any_dtype(series):
|
||||
return series
|
||||
|
||||
# 预处理:转换为数值和字符串两种形式
|
||||
numeric_series = pd.to_numeric(series, errors='coerce')
|
||||
string_series = series.astype(str).str.strip()
|
||||
|
||||
result = pd.Series(pd.NaT, index=series.index, dtype='datetime64[ns]')
|
||||
|
||||
# 数值时间戳处理
|
||||
masks = {
|
||||
'ms': numeric_series >= 1e11,
|
||||
's': (numeric_series >= 1e9) & (numeric_series < 1e11),
|
||||
'excel': (numeric_series > 20000) & (numeric_series < 60000)
|
||||
}
|
||||
|
||||
for mask_type, mask in masks.items():
|
||||
if mask.any():
|
||||
if mask_type == 'ms':
|
||||
result.loc[mask] = pd.to_datetime(numeric_series.loc[mask], unit='ms')
|
||||
elif mask_type == 's':
|
||||
result.loc[mask] = pd.to_datetime(numeric_series.loc[mask], unit='s')
|
||||
elif mask_type == 'excel':
|
||||
origin = pd.Timestamp('1899-12-30')
|
||||
result.loc[mask] = origin + pd.to_timedelta(numeric_series.loc[mask], unit='D')
|
||||
|
||||
# 字符串日期处理
|
||||
remaining_mask = result.isna()
|
||||
if remaining_mask.any():
|
||||
remaining_strings = string_series.loc[remaining_mask]
|
||||
|
||||
# 特定格式优先处理
|
||||
format_patterns = [
|
||||
(r'^\d{4}-\d{2}-\d{2} \d{2}-\d{2}-\d{2}$', '%Y-%m-%d %H-%M-%S'),
|
||||
]
|
||||
|
||||
for pattern, date_format in format_patterns:
|
||||
format_mask = remaining_strings.str.match(pattern)
|
||||
if format_mask.any():
|
||||
result.loc[remaining_mask[remaining_mask].index[format_mask]] = pd.to_datetime(
|
||||
remaining_strings.loc[format_mask], format=date_format, errors='coerce'
|
||||
)
|
||||
|
||||
# 通用解析
|
||||
still_na_mask = result.isna() & remaining_mask
|
||||
if still_na_mask.any():
|
||||
result.loc[still_na_mask] = pd.to_datetime(
|
||||
string_series.loc[still_na_mask], errors='coerce'
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def _preprocess_test_data(self, test_data: pd.DataFrame) -> pd.DataFrame:
|
||||
"""数据预处理"""
|
||||
# 数值转换
|
||||
test_data['Measurement_num'] = self._clean_and_convert_series(
|
||||
test_data['Measurement'], 'numeric'
|
||||
)
|
||||
test_data['TestTime_dt'] = self._clean_and_convert_series(
|
||||
test_data['Test Time'], 'datetime'
|
||||
)
|
||||
|
||||
# 去除无效数据
|
||||
valid_data = test_data.dropna(subset=['Measurement_num', 'TestTime_dt'])
|
||||
return valid_data.sort_values('TestTime_dt')
|
||||
|
||||
def _calculate_statistics(self, y_data: pd.Series) -> Dict[str, float]:
|
||||
"""计算统计信息"""
|
||||
stats = {
|
||||
'count': len(y_data),
|
||||
'mean': y_data.mean(),
|
||||
'median': y_data.median(),
|
||||
'min': y_data.min(),
|
||||
'max': y_data.max(),
|
||||
'std': y_data.std(),
|
||||
'q1': y_data.quantile(0.25),
|
||||
'q3': y_data.quantile(0.75)
|
||||
}
|
||||
return stats
|
||||
|
||||
def _plot_to_base64(self, fig) -> str:
|
||||
"""将图表转换为base64编码"""
|
||||
buf = BytesIO()
|
||||
fig.savefig(buf, format='png', dpi=150, bbox_inches='tight')
|
||||
buf.seek(0)
|
||||
img_str = base64.b64encode(buf.read()).decode('utf-8')
|
||||
plt.close(fig)
|
||||
return img_str
|
||||
|
||||
def _create_summary_plot(self, test_data: pd.DataFrame, test_name: str,
|
||||
lower_plot: Optional[float], upper_plot: Optional[float]) -> str:
|
||||
"""创建汇总图(所有SN在一个图中)"""
|
||||
fig, ax = plt.subplots(figsize=(12, 8))
|
||||
|
||||
# 分组绘制
|
||||
groups = list(test_data.groupby("SN")) if "SN" in test_data.columns else [("Unknown_SN", test_data)]
|
||||
for sn, group in groups:
|
||||
ax.scatter(group['TestTime_dt'], group['Measurement_num'],
|
||||
label=str(sn), alpha=0.7, s=25)
|
||||
|
||||
# 计算统计信息
|
||||
y_data = test_data['Measurement_num']
|
||||
stats = self._calculate_statistics(y_data)
|
||||
|
||||
# 绘制限值线和统计线
|
||||
x_min, x_max = test_data['TestTime_dt'].min(), test_data['TestTime_dt'].max()
|
||||
|
||||
if lower_plot is not None:
|
||||
ax.axhline(y=lower_plot, color='green', linestyle='--', linewidth=1.2, label="Lower Limit")
|
||||
if upper_plot is not None:
|
||||
ax.axhline(y=upper_plot, color='red', linestyle='--', linewidth=1.2, label="Upper Limit")
|
||||
|
||||
# 添加统计线
|
||||
ax.hlines(y=stats['mean'], xmin=x_min, xmax=x_max, colors='orange',
|
||||
linestyles='-', linewidth=1.5, alpha=0.7, label='Mean')
|
||||
ax.hlines(y=stats['median'], xmin=x_min, xmax=x_max, colors='purple',
|
||||
linestyles='-.', linewidth=1.5, alpha=0.7, label='Median')
|
||||
|
||||
# 设置图形属性
|
||||
ax.set_title(f"汇总图 - {test_name}")
|
||||
ax.set_xlabel("Test Time")
|
||||
ax.set_ylabel("Measurement Value")
|
||||
ax.grid(True, alpha=0.3)
|
||||
ax.tick_params(axis='x', rotation=45)
|
||||
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
|
||||
|
||||
return self._plot_to_base64(fig)
|
||||
|
||||
def _create_sn_plots(self, test_data: pd.DataFrame, test_name: str,
|
||||
lower_plot: Optional[float], upper_plot: Optional[float]) -> List[Dict[str, str]]:
|
||||
"""为每个SN创建独立图表"""
|
||||
sn_plots = []
|
||||
|
||||
if "SN" not in test_data.columns:
|
||||
return sn_plots
|
||||
|
||||
sn_groups = test_data.groupby("SN")
|
||||
|
||||
for sn, group in sn_groups:
|
||||
if group.empty:
|
||||
continue
|
||||
|
||||
fig, ax = plt.subplots(figsize=(10, 6))
|
||||
|
||||
# 绘制当前SN的数据点
|
||||
ax.scatter(group['TestTime_dt'], group['Measurement_num'],
|
||||
color='blue', alpha=0.7, s=30, label=f"SN: {sn}")
|
||||
|
||||
# 计算当前SN的统计信息
|
||||
y_data = group['Measurement_num']
|
||||
stats = self._calculate_statistics(y_data)
|
||||
|
||||
# 绘制限值线
|
||||
x_min, x_max = group['TestTime_dt'].min(), group['TestTime_dt'].max()
|
||||
|
||||
if lower_plot is not None:
|
||||
ax.axhline(y=lower_plot, color='green', linestyle='--', linewidth=1.2, label="Lower Limit")
|
||||
if upper_plot is not None:
|
||||
ax.axhline(y=upper_plot, color='red', linestyle='--', linewidth=1.2, label="Upper Limit")
|
||||
|
||||
# 添加统计线
|
||||
ax.hlines(y=stats['mean'], xmin=x_min, xmax=x_max, colors='orange',
|
||||
linestyles='-', linewidth=1.5, alpha=0.7, label='Mean')
|
||||
ax.hlines(y=stats['median'], xmin=x_min, xmax=x_max, colors='purple',
|
||||
linestyles='-.', linewidth=1.5, alpha=0.7, label='Median')
|
||||
|
||||
# 设置图形属性
|
||||
ax.set_title(f"SN独立图 - {test_name} (SN: {sn})")
|
||||
ax.set_xlabel("Test Time")
|
||||
ax.set_ylabel("Measurement Value")
|
||||
ax.grid(True, alpha=0.3)
|
||||
ax.tick_params(axis='x', rotation=45)
|
||||
ax.legend()
|
||||
|
||||
# 转换为base64
|
||||
plot_image = self._plot_to_base64(fig)
|
||||
sn_plots.append({"sn": str(sn), "image": plot_image})
|
||||
|
||||
return sn_plots
|
||||
|
||||
def _determine_test_status(self, stats: Dict[str, float],
|
||||
lower_limit: Optional[float],
|
||||
upper_limit: Optional[float]) -> Dict[str, Any]:
|
||||
"""确定测试状态"""
|
||||
status = "success"
|
||||
status_display = "正常"
|
||||
|
||||
if lower_limit is not None and upper_limit is not None:
|
||||
# 检查是否超出限值
|
||||
if stats['min'] < lower_limit or stats['max'] > upper_limit:
|
||||
status = "danger"
|
||||
status_display = "异常"
|
||||
elif (stats['mean'] < lower_limit * 1.1 or stats['mean'] > upper_limit * 0.9 or
|
||||
stats['std'] > (upper_limit - lower_limit) * 0.2):
|
||||
status = "warning"
|
||||
status_display = "警告"
|
||||
|
||||
return {"status": status, "status_display": status_display}
|
||||
|
||||
def generate_html_report(self, filtered_df: pd.DataFrame, keyword: str,
|
||||
unique_tests: List[str]) -> None:
|
||||
"""生成HTML报告"""
|
||||
self._print_stage("生成HTML报告")
|
||||
start_time = time.time()
|
||||
|
||||
test_results = []
|
||||
total_points = 0
|
||||
status_counts = {"success": 0, "warning": 0, "danger": 0}
|
||||
|
||||
for i, test_name in enumerate(unique_tests, 1):
|
||||
self._print_progress(i, len(unique_tests), "生成测试报告")
|
||||
|
||||
# 获取测试数据
|
||||
test_data = filtered_df[filtered_df["Test Name New"] == test_name].copy()
|
||||
test_data = self._preprocess_test_data(test_data)
|
||||
|
||||
if test_data.empty:
|
||||
continue
|
||||
|
||||
# 提取限值信息
|
||||
lower_plot, upper_plot, _, _ = self._extract_limits(test_data)
|
||||
|
||||
# 计算统计信息
|
||||
y_data = test_data['Measurement_num']
|
||||
stats = self._calculate_statistics(y_data)
|
||||
total_points += stats['count']
|
||||
|
||||
# 生成汇总图表
|
||||
summary_plot_image = self._create_summary_plot(test_data, test_name, lower_plot, upper_plot)
|
||||
|
||||
# 生成SN独立图表
|
||||
sn_plot_images = self._create_sn_plots(test_data, test_name, lower_plot, upper_plot)
|
||||
|
||||
# 确定测试状态
|
||||
status_info = self._determine_test_status(stats, lower_plot, upper_plot)
|
||||
status_counts[status_info["status"]] += 1
|
||||
|
||||
# 添加到结果列表
|
||||
test_results.append({
|
||||
"name": test_name,
|
||||
"stats": stats,
|
||||
"limits": {"lower": lower_plot, "upper": upper_plot},
|
||||
"summary_plot_image": summary_plot_image,
|
||||
"sn_plot_images": sn_plot_images,
|
||||
"status": status_info["status"],
|
||||
"status_display": status_info["status_display"]
|
||||
})
|
||||
|
||||
# 渲染HTML模板
|
||||
template = Template(HTML_TEMPLATE)
|
||||
html_content = template.render(
|
||||
keyword=keyword,
|
||||
timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
test_count=len(test_results),
|
||||
total_points=total_points,
|
||||
tests=test_results,
|
||||
file_path=self.file_path,
|
||||
analysis_time=round(time.time() - start_time, 2),
|
||||
status_counts={"normal": status_counts["success"], "warning": status_counts["warning"],
|
||||
"abnormal": status_counts["danger"]}
|
||||
)
|
||||
|
||||
# 保存HTML文件
|
||||
with open(self.html_report_path, 'w', encoding='utf-8') as f:
|
||||
f.write(html_content)
|
||||
|
||||
print(f"\nHTML报告已生成: {self.html_report_path}")
|
||||
print(f"共处理 {len(test_results)} 个测试项,{total_points} 个数据点")
|
||||
|
||||
def run(self) -> None:
|
||||
"""运行主程序"""
|
||||
try:
|
||||
self.get_file_path()
|
||||
self.load_data()
|
||||
while True:
|
||||
filtered_df, keyword, unique_tests = self.get_keyword()
|
||||
self.create_output_dir(keyword)
|
||||
self.generate_html_report(filtered_df, keyword, unique_tests)
|
||||
print(f"\n✅ 分析完成!")
|
||||
# print(f"📊 报告文件: {self.html_report_path}")
|
||||
# print(f"📁 输出目录: {self.output_dir}")
|
||||
except KeyboardInterrupt:
|
||||
print(f"\n{Fore.YELLOW}⚠ 用户中断程序")
|
||||
except Exception as e:
|
||||
print(f"\n❌ 发生错误: {type(e).__name__}: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
plotter = TestReportScatterPlotter()
|
||||
plotter.run()
|
||||
Reference in New Issue
Block a user