代码和代码结构更新

This commit is contained in:
2026-02-05 09:04:10 +08:00
parent 5c846eae94
commit 46ae47274d
9 changed files with 4742 additions and 1 deletions

View File

@@ -14,4 +14,4 @@ htmlReportProcess_Merge_picHtml_V2.py
htmlReportProcess_Merge_pic_V2.py
/htmlReportProcess*/
#/htmlReportProcess*/

View File

@@ -0,0 +1,30 @@
/build/*
/build
/dist/*
/dist
/source/*
/source
htmlReportProcess_Merge_picHtml_V3.py
htmlReportProcess_Merge_picHtml_V2.py
htmlReportProcess_Merge_pic_V2.py
#/htmlReportProcess*/
htmlReportProcess_cmd_pV2.py
htmlReportProcess_cmd_pV3.py
htmlReportProcess_cmd_V2.py
htmlReportProcess.py
htmlReportProcess_Merge_cmd_V2.py
htmlReportProcess_Merge.py

View File

@@ -0,0 +1,434 @@
import os
import sys
import re
import time
import tkinter as tk
from tkinter import filedialog
from openpyxl import Workbook, load_workbook
from datetime import datetime
from colorama import Fore, Style, init
class TestReportMerger:
def __init__(self):
self.source_files = []
self.merged_data = []
self.output_filepath = ""
self.selected_folder = ""
# 进度统计
self.stats = {
"total_files": 0,
"processed_files": 0,
"skipped_no_sheet": 0,
"errors": 0,
"total_rows_merged": 0
}
def _print_stage(self, msg):
print(f"\n=== {msg} ===")
def _print_progress(self, current, total, prefix="处理进度"):
percent = (current / total * 100) if total else 0
bar_len = 30
filled = int(bar_len * current / total) if total else 0
bar = "" * filled + "-" * (bar_len - filled)
print(f"\r{prefix}: [{bar}] {current}/{total} ({percent:.1f}%)", end="", flush=True)
def select_directory(self):
"""选择包含测试报告的目录"""
root = tk.Tk()
root.withdraw() # 隐藏主窗口
self.selected_folder = filedialog.askdirectory(title="选择包含测试报告的目录")
return self.selected_folder
def _get_directory_from_console(self):
"""从控制台获取目录路径"""
while True:
print(f"\n{Fore.CYAN}=== HTML-excel报告处理程序 ===")
print(f"{Fore.WHITE}请输入包含测试报告文件的目录路径:")
self.selected_folder = input("> ").strip()
if not self.selected_folder:
print(f"{Fore.YELLOW}⚠ 路径不能为空,请重新输入")
continue
# 处理路径中的引号
self.selected_folder = self.selected_folder.strip('"\'')
if not os.path.exists(self.selected_folder):
print(f"{Fore.RED}❌ 路径不存在,请重新输入")
continue
if not os.path.isdir(self.selected_folder):
print(f"{Fore.RED}❌ 请输入目录路径,而不是文件路径")
continue
return self.selected_folder
def scan_files(self):
"""扫描目录下的Excel文件仅处理 .xlsx忽略临时文件"""
if not self.selected_folder:
return False
self._print_stage("扫描文件")
# 仅处理 .xlsx忽略 ~$ 开头的临时文件
files = [
os.path.join(self.selected_folder, f)
for f in os.listdir(self.selected_folder)
if f.lower().endswith(".xlsx") and not f.startswith("~$")
]
self.source_files = files
self.stats["total_files"] = len(self.source_files)
if self.stats["total_files"] == 0:
print("指定目录中没有找到可处理的 .xlsx 文件")
return False
print(f"找到 {self.stats['total_files']} 个 .xlsx 文件")
return True
@staticmethod
def _parse_source_filename(value):
"""
解析 Source File name 字段,提取 SN、TestCycleTime、Cell。
支持形如:
Fxxxx(...日期时间...)-CELL.html
例如F27140001X3M00004013683JK00190(2025-09-22 09-03-22)-14.html
"""
sn, ts, cell = "", "", ""
if not value:
return sn, ts, cell
# 只取基名,防止包含路径
base = os.path.basename(str(value)).strip()
# 主模式SN(时间)-Cell[.扩展名]
m = re.match(r'^(?P<sn>[^()\-]+)\((?P<time>[^)]+)\)-(?P<cell>\d+)', base)
if m:
sn = m.group('sn').strip()
ts = m.group('time').strip()
cell = m.group('cell').strip()
return sn, ts, cell
# 兜底:尝试匹配 SN以 F 开头的字母数字串、时间括号内、Cell-数字)
sn_match = re.search(r'\bF[A-Z0-9]+\b', base)
time_match = re.search(r'\(([^)]+)\)', base)
cell_match = re.search(r'-(\d+)(?:\.\w+)?$', base)
if sn_match:
sn = sn_match.group(0).strip()
if time_match:
ts = time_match.group(1).strip()
if cell_match:
cell = cell_match.group(1).strip()
return sn, ts, cell
def merge_reports(self):
"""合并所有报告中的 'All Tests' 工作表,并拆分 Source File name 为 SN/TestCycleTime/Cell 列"""
if not self.source_files:
return False
self._print_stage("合并报告数据")
start_time = time.time()
# 初始化合并数据,保留表头
self.merged_data = []
header_added = False
source_col_idx = None # 记录“Source File name”列索引
total_files = self.stats["total_files"]
for idx, file_path in enumerate(self.source_files, start=1):
filename = os.path.basename(file_path)
# 文件级进度条
self._print_progress(idx, total_files, prefix="文件处理")
try:
wb = load_workbook(file_path, read_only=True, data_only=True)
if 'All Tests' not in wb.sheetnames:
self.stats["skipped_no_sheet"] += 1
print(f"\n文件 {filename} 中没有 'All Tests' 工作表,已跳过")
wb.close()
continue
sheet = wb['All Tests']
# 添加表头(只添加一次)
if not header_added and sheet.max_row > 0:
header = [cell.value for cell in sheet[1]]
# 定位 Source File name 列(大小写不敏感)
source_col_idx = None
for i, h in enumerate(header):
if h and str(h).strip().lower() == "source file name":
source_col_idx = i
break
# 扩展表头:新增 SN / TestCycleTime / Cell / 数据来源
extended_header = list(header)
extended_header += ["SN", "TestCycleTime", "Cell", "数据来源"]
self.merged_data.append(extended_header)
header_added = True
# 统计行数(不含表头)
data_rows_count = max(sheet.max_row - 1, 0)
# 添加数据行
added_rows = 0
for row in sheet.iter_rows(min_row=2, values_only=True):
if row is None:
continue
# 过滤全空行
if all(cell is None for cell in row):
continue
row_list = list(row)
# 从 Source File name 列解析三项
sn, ts, cell = "", "", ""
if source_col_idx is not None and source_col_idx < len(row_list):
sn, ts, cell = self._parse_source_filename(row_list[source_col_idx])
# 追加解析列与数据来源列
row_list += [sn, ts, cell, filename]
self.merged_data.append(row_list)
added_rows += 1
wb.close()
self.stats["processed_files"] += 1
self.stats["total_rows_merged"] += added_rows
# 每个文件处理完成后给出简报
print(f"\n→ 已处理: {filename} | 预估行数: {data_rows_count} | 实际合并行数: {added_rows} | 累计合并行数: {self.stats['total_rows_merged']}")
except Exception as e:
self.stats["errors"] += 1
print(f"\n处理文件 {filename} 时出错: {type(e).__name__}: {str(e)}")
continue
elapsed = time.time() - start_time
print(f"\n合并阶段完成,耗时: {elapsed:.1f}")
return len(self.merged_data) > 1 # 至少有一个表头和一个数据行
def save_merged_report(self):
"""保存合并后的报告到选择的目录"""
if not self.merged_data or not self.selected_folder:
return False
self._print_stage("保存合并结果")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_filename = f"测试报告合并_{timestamp}.xlsx"
self.output_filepath = os.path.join(self.selected_folder, output_filename)
try:
wb = Workbook()
ws = wb.active
ws.title = "Merged All Tests"
# 写入时也给出简单的进度(每写入一定行数提示一次)
total_rows = len(self.merged_data)
last_print = time.time()
for i, row in enumerate(self.merged_data, start=1):
ws.append(row)
# 控制输出频率,避免大量打印影响速度
if i == total_rows or (time.time() - last_print) > 0.5:
self._print_progress(i, total_rows, prefix="写入Excel行")
last_print = time.time()
wb.save(self.output_filepath)
print(f"\n文件已保存: {self.output_filepath}")
return True
except Exception as e:
print(f"保存合并报告时出错: {type(e).__name__}: {str(e)}")
return False
def save_merged_report_xlsxwriter(self):
"""使用xlsxwriter引擎保存带进度显示"""
if not self.merged_data or not self.selected_folder:
return False
self._print_stage("保存合并结果")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_filename = f"测试报告合并_{timestamp}.xlsx"
self.output_filepath = os.path.join(self.selected_folder, output_filename)
try:
import pandas as pd
import xlsxwriter
# 将数据转换为DataFrame
headers = self.merged_data[0]
data_rows = self.merged_data[1:]
total_rows = len(data_rows)
print(f"开始保存,共{total_rows}行数据到工作表 'Merged All Tests'...")
# 创建workbook和worksheet
workbook = xlsxwriter.Workbook(self.output_filepath)
worksheet = workbook.add_worksheet('Merged All Tests')
# 写入表头
header_format = workbook.add_format({
'bold': True,
'fg_color': '#D7E4BC',
'border': 1
})
for col_num, header in enumerate(headers):
worksheet.write(0, col_num, header, header_format)
# 写入数据并显示进度
processed_rows = 0
batch_size = 1000 # 每批处理的行数
for start_idx in range(0, total_rows, batch_size):
end_idx = min(start_idx + batch_size, total_rows)
batch_data = data_rows[start_idx:end_idx]
# 写入这一批数据
for row_offset, row_data in enumerate(batch_data):
for col_num, cell_value in enumerate(row_data):
worksheet.write(start_idx + row_offset + 1, col_num, cell_value)
processed_rows += 1
# 每处理一定数量或最后一行时更新进度
if processed_rows % max(1, total_rows // 20) == 0 or processed_rows == total_rows:
percentage = int((processed_rows / total_rows) * 100)
print(f"\r保存进度: {percentage}% ({processed_rows}/{total_rows}行)", end="", flush=True)
# 自动调整列宽
for idx, _ in enumerate(headers):
worksheet.set_column(idx, idx, 15) # 默认宽度
workbook.close()
print("\r保存完成!" + " " * 40) # 清空进度行
print(f"文件已保存: {self.output_filepath}")
print(f"工作表名: Merged All Tests")
return True
except ImportError:
print("xlsxwriter未安装使用备选方案")
return self.save_merged_report()
def save_merged_report_xlsxwriter_with_progress(self):
"""使用xlsxwriter带进度显示的保存"""
if not self.merged_data or not self.selected_folder:
return False
self._print_stage("保存合并结果")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_filename = f"测试报告合并_{timestamp}.xlsx"
self.output_filepath = os.path.join(self.selected_folder, output_filename)
try:
import xlsxwriter
from tqdm import tqdm # 可选可选:更美观的进度条
headers = self.merged_data[0]
data_rows = self.merged_data[1:]
total_rows = len(data_rows)
print(f"开始保存,共{len(headers)}{total_rows}行数据...")
# 创建workbook和worksheet
workbook = xlsxwriter.Workbook(self.output_filepath)
# worksheet = workbook.add_worksheet()
worksheet = workbook.add_worksheet('Merged All Tests')
# 写入表头
for col_num, header in enumerate(headers):
worksheet.write(0, col_num, header)
# 批量写入数据并显示进度
batch_size = 500 # 每批处理的行数
# 如果有tqdm就用美观进度条否则用简易版本
try:
from tqdm import tqdm
pbar = tqdm(total=total_rows, desc="保存进度", unit="")
except ImportError:
pbar = None
rows_saved = 0
for start_idx in range(0, total_rows, batch_size):
end_idx = min(start_idx + batch_size, total_rows)
batch_data = data_rows[start_idx:end_idx]
# 写入这一批数据
for row_offset, row_data in enumerate(batch_data):
for col_num, cell_value in enumerate(row_data):
worksheet.write(start_idx + row_offset + 1, col_num, cell_value)
rows_saved += 1
if pbar:
pbar.update(1)
elif rows_saved % max(1, total_rows // 10) == 0 or rows_saved == total_rows:
percentage = int((rows_saved / total_rows) * 100)
print(f"\r保存进度: {percentage}% ({rows_saved}/{total_rows}行)", end="", flush=True)
if pbar:
pbar.close()
else:
print("\r保存完成!" + " " * 30) # 清空进度行
workbook.close()
print(f"文件已保存: {self.output_filepath}")
return True
except ImportError:
print("xlsxwriter, tqdm 未安装,使用备选方案")
return self.save_merged_report_xlsxwriter()
def run(self):
"""运行合并流程"""
print("=== 测试报告合并工具 ===")
# 1. 选择目录
# if not self.select_directory():
# print("未选择目录,程序退出")
# return
source_dir = self._get_directory_from_console()
if not source_dir:
print(f"{Fore.RED}❌ 未选择目录,程序退出")
return
# 2. 扫描文件
if not self.scan_files():
print("指定目录中没有找到Excel文件")
return
print(f"准备处理 {len(self.source_files)} 个文件...")
# 3. 合并报告
if not self.merge_reports():
print("没有找到包含 'All Tests' 工作表的文件或合并数据为空")
# 汇总统计
self._print_stage("处理摘要")
print(f"总文件数: {self.stats['total_files']}")
print(f"成功处理: {self.stats['processed_files']}")
print(f"跳过(无工作表): {self.stats['skipped_no_sheet']}")
print(f"错误文件: {self.stats['errors']}")
print(f"合并总行数: {self.stats['total_rows_merged']}")
return
# 4. 保存结果
# if self.save_merged_report():
# if self.save_merged_report_xlsxwriter():
if self.save_merged_report_xlsxwriter_with_progress():
print("合并完成!")
else:
print("保存合并报告时出错")
# 汇总统计
self._print_stage("处理摘要")
print(f"总文件数: {self.stats['total_files']}")
print(f"成功处理: {self.stats['processed_files']}")
print(f"跳过(无工作表): {self.stats['skipped_no_sheet']}")
print(f"错误文件: {self.stats['errors']}")
print(f"合并总行数: {self.stats['total_rows_merged']}")
if __name__ == "__main__":
merger = TestReportMerger()
merger.run()

View File

@@ -0,0 +1,26 @@
/build/*
/build
/dist/*
/dist
/source/*
/source
htmlReportProcess_Merge_picHtml_V3.py
htmlReportProcess_Merge_picHtml_V2.py
htmlReportProcess_Merge_pic_V2.py
#/htmlReportProcess*/
htmlReportProcess_cmd_pV2.py
htmlReportProcess_cmd_pV3.py
htmlReportProcess_cmd_V2.py
htmlReportProcess.py

View File

@@ -0,0 +1,620 @@
import pandas as pd
from bs4 import BeautifulSoup
import os
import re
import sys
from datetime import datetime
from colorama import Fore, Style, init
from concurrent.futures import ThreadPoolExecutor, as_completed
init(autoreset=True)
class ProgressTracker:
"""终端进度显示控制器(增强版)"""
def __init__(self):
self.processed = 0
self.total = 0
self.start_time = datetime.now()
def begin(self, total_files, sn_file_counts=None):
"""初始化进度跟踪显示文件分布统计sn_file_counts可选"""
self.total = total_files
self.start_time = datetime.now()
# 构建文件分布统计信息
dist_info = []
if sn_file_counts:
for sn, count in sn_file_counts.items():
dist_info.append(f"{sn[:31]}: {count}个 html文件。")
print(f"{dist_info}")
# 格式化输出
stats_line = f"{Fore.CYAN}▶ 开始处理 {self.total} 个任务"
if dist_info:
dist_line = f"{Fore.MAGENTA}⚫ SN文件分布:\n{'\n'.join(dist_info)}"
print(f"\n{stats_line.ljust(80)}")
print(f"{dist_line.ljust(580)}{Style.RESET_ALL}")
else:
print(f"\n{stats_line.ljust(80)}")
def update(self, success=True, prefix=''):
"""更新进度信息"""
self.processed += 1
time_used = self._format_timedelta(datetime.now() - self.start_time)
percent = self.processed / self.total * 100
status_icon = f"{Fore.GREEN}" if success else f"{Fore.RED}"
status_text = f"{status_icon} {self.processed}/{self.total} [{(percent / 5):.0f}|{'' * int(percent / 5)}{' ' * (20 - int(percent / 5))}|]"
sys_info = [
f"{prefix}{status_text.ljust(40)}",
f"进度: {percent:.1f}%".ljust(15),
f"耗时: {time_used}".ljust(15),
f"速度: {self.processed / (datetime.now() - self.start_time).total_seconds():.1f} 任务/秒"
]
print('\x1b[2K\r' + ''.join(sys_info), end='', flush=True)
def end(self, prefix=''):
"""结束进度跟踪"""
print(f"\n{Fore.GREEN}{prefix}处理完成! 总耗时: {(datetime.now() - self.start_time).total_seconds():.1f}\n")
def _format_timedelta(self, delta):
"""格式化时间差"""
seconds = delta.total_seconds()
return f"{int(seconds // 3600):02}:{int((seconds % 3600) // 60):02}:{int(seconds % 60):02}"
class HTMLReportProcessor:
"""HTML报告处理核心类增强版并行+线程安全)"""
def __init__(self):
# 所有共享数据仅在主线程合并,避免并发写入
self.sn_data_map = {}
self.progress = ProgressTracker()
# 跟踪每个SN的文件来源
self.sn_source_files = {}
# 跟踪每个SN的fail数量
self.sn_fail_counts = {}
# 存储SN文件分布
self.sn_file_counts = {}
@staticmethod
def _clean_test_name(raw_name):
"""清洗测试名称"""
return re.sub(r'^Round\d+_\d+_', '', raw_name)
def _extract_sn(self, soup, filename):
"""双重机制提取SN号增强版正则"""
try:
sn_regex = r'F[A-Z0-9]{15}(?:[A-Z0-9]{5,})?' # 支持短格式和长格式
if soup is not None:
# 机制1从HTML内容提取
sn_tag = soup.find('h3', string=re.compile(r'Serial Number:', re.I))
if sn_tag:
# 优化点2增加边界检测防止误匹配
content_match = re.search(rf'\b({sn_regex})\b', sn_tag.get_text(), flags=re.I)
if content_match:
return content_match.group(1)
else:
return "UNKNOWN_SN"
else:
# 如果未找到sn_tag尝试从文档其他位置检索
html_text = soup.get_text(" ", strip=True)
content_match = re.search(rf'\b({sn_regex})\b', html_text, flags=re.I)
return content_match.group(1) if content_match else "UNKNOWN_SN"
else:
# 机制2从文件名提取
content_match = re.search(rf'\b({sn_regex})\b', filename, flags=re.I)
return content_match.group(1) if content_match else "UNKNOWN_SN"
except Exception as e:
print(f"SN提取失败: {filename} - {str(e)}")
return "ERROR_SN"
def process_files(self, source_dir):
"""处理目录中的所有文件(并行版)"""
all_files = self._scan_files(source_dir)
# 预扫描文件收集SN分布
self._collect_sn_distribution(all_files)
# 传递SN分布信息给进度跟踪器
self.progress.begin(len(all_files), self.sn_file_counts)
# 并行处理文件:主线程负责合并数据和打印进度
max_workers = self._calc_max_workers(env_var="OVERRIDE_WORKERS")
print(f"{Fore.CYAN}▶ 使用线程并发数(HTML解析): {max_workers}")
futures = []
results = []
errors = []
with ThreadPoolExecutor(max_workers=max_workers) as executor:
for f in all_files:
futures.append(executor.submit(self._process_single_file, f))
for future in as_completed(futures):
try:
res = future.result()
except Exception as e:
# 捕获未处理的异常
res = {'success': False, 'error': f"未知异常: {type(e).__name__}: {e}"}
# 更新进度(主线程)
self.progress.update(res.get('success', False), prefix='HTML解析: ')
# 成功/失败归集
if res.get('success'):
results.append(res)
else:
errors.append(res.get('error'))
self.progress.end(prefix='HTML解析')
# 输出错误明细,便于定位问题
if errors:
print(f"\n{Fore.RED}✗ 以下文件处理失败(共 {len(errors)} 个):")
for err in errors[:50]:
print(f" - {err}")
if len(errors) > 50:
print(f" ... 其余 {len(errors) - 50} 条省略")
# 合并结果到共享数据结构(主线程)
for res in results:
sn = res['sn']
headers = res['headers']
rows = res['rows']
filename = res['filename']
fail_count = res['file_fail_count']
self._store_data(sn, headers, rows)
if sn not in self.sn_source_files:
self.sn_source_files[sn] = set()
self.sn_source_files[sn].add(filename)
self.sn_fail_counts[sn] = self.sn_fail_counts.get(sn, 0) + fail_count
# 添加报告统计信息
self._add_report_statistics()
return self.sn_data_map
def _calc_max_workers(self, env_var="OVERRIDE_WORKERS"):
"""根据机器性能自动计算线程数,可通过环境变量覆盖"""
override = os.getenv(env_var)
if override and override.isdigit():
return max(1, int(override))
# I/O + 中等CPU场景适度放大
cpu = os.cpu_count() or 2
return max(4, min(32, cpu * 2))
def _collect_sn_distribution(self, file_list):
"""预扫描文件收集SN分布信息"""
print(f"{Fore.YELLOW}⌛ 正在扫描文件分布...")
for file_path in file_list:
filename = os.path.basename(file_path)
try:
# 直接从文件名提取SN不解析文件内容
sn = self._extract_sn_from_filename(filename)
# 更新SN文件计数
self.sn_file_counts[sn] = self.sn_file_counts.get(sn, 0) + 1
except Exception as e:
print(f"\n{Fore.RED}⚠ 处理失败: {filename} - :{str(e)}")
pass
print(f"{Fore.GREEN}✔ SN分布扫描完成")
def _extract_sn_from_filename(self, filename):
"""仅从文件名提取SN号优化版"""
sn_regex = r'F[A-Z0-9]{15}(?:[A-Z0-9]{5,})?'
content_match = re.search(rf'\b({sn_regex})\b', filename, flags=re.I)
return content_match.group(1) if content_match else "UNKNOWN_SN"
def _add_report_statistics(self):
"""为每个SN添加报告统计信息"""
for sn, data_info in self.sn_data_map.items():
# 添加文件来源数量
source_count = len(self.sn_source_files.get(sn, []))
data_info['report_stats'] = {
'source_files_count': source_count,
# 注意此处暂存解析阶段的失败数量Excel生成时会以 df_fail 的长度为准覆盖
'fail_count': self.sn_fail_counts.get(sn, 0)
}
def _scan_files(self, source_dir):
"""扫描目标目录中的HTML文件"""
all_files = []
for root_dir, _, files in os.walk(source_dir):
all_files.extend(
[os.path.join(root_dir, f) for f in files if f.lower().endswith(('.html', '.htm'))]
)
return all_files
def _process_single_file(self, file_path):
"""处理单个文件(线程安全:不修改共享状态,返回结果)"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
html_content = f.read()
soup = BeautifulSoup(html_content, 'html.parser')
filename = os.path.basename(file_path)
sn = self._extract_sn(soup, filename)
table = soup.find('table', border=1) or soup.find('table')
if not table:
raise ValueError("未找到有效数据表格")
# 修改:将 sn 传入,返回 file_fail_count确保与数据存储同一 SN
headers, rows, file_fail_count = self._process_table(table, sn, filename, html_content)
return {
'success': True,
'sn': sn,
'headers': headers,
'rows': rows,
'file_fail_count': file_fail_count,
'filename': filename
}
except Exception as e:
return {
'success': False,
'error': f"{os.path.basename(file_path)} - {type(e).__name__}: {str(e)}"
}
def _find_status_index(self, headers):
"""根据表头动态识别状态列索引"""
if not headers:
return None
candidates = ('status', 'result', 'test status')
for idx, h in enumerate(headers):
h_norm = str(h).strip().lower()
if h_norm in candidates or re.search(r'status|result', h_norm, flags=re.I):
return idx
return None
def _process_table(self, table, sn, filename, html_content):
"""处理数据表格修复FAIL统计与状态列识别参数传入避免并发问题"""
# 更健壮的表头行识别
header_tr = table.find('tr', bgcolor='#eeeeee')
if not header_tr:
# 尝试第一个包含 th 的行
for tr in table.find_all('tr'):
if tr.find('th'):
header_tr = tr
break
if not header_tr:
# 兜底使用第一行
header_tr = table.find('tr')
headers = [th.get_text(strip=True) for th in header_tr.find_all(['th', 'td'])]
if len(headers) > 11:
headers = headers[:11]
# 插入新增列
try:
test_name_idx = headers.index('Test Name')
except ValueError:
# 如果表头没有 Test Name尝试模糊匹配
test_name_idx = next((i for i, h in enumerate(headers) if re.search(r'test\s*name', h, flags=re.I)), 1)
headers.insert(test_name_idx + 1, 'Test Name New')
headers.append('Test Time')
headers.append('Source File name')
# 动态识别状态列索引
status_col_idx = self._find_status_index(headers)
# 初始化基准时间戳和全局叠加器
base_timestamp = None
global_elapsed_accumulator = 0.0
elapsed_append = 0
file_fail_count = 0 # 跟踪当前文件的FAIL数量
# 从报告头部提取 Start Time 作为初始基准
start_time_match = re.search(r"Start Time:\s*(.+?)(?:\s*<|$)", html_content, re.IGNORECASE)
if start_time_match:
start_time_str = start_time_match.group(1).strip()
# 清理字符串移除HTML标签
start_time_str = re.sub(r'<[^>]+>', '', start_time_str).strip()
try:
# 解析 Start Time 字符串为 datetime 对象
dt = datetime.strptime(start_time_str, "%A, %B %d, %Y %I:%M:%S %p")
base_timestamp = dt.timestamp()
global_elapsed_accumulator = base_timestamp
print(f"{Fore.GREEN}✔ 使用Start Time作为时间基准: {start_time_str} -> {base_timestamp}")
except Exception as e:
print(f"{Fore.RED}⚠ 解析Start Time失败: {start_time_str} - {e}")
# 尝试其他可能的日期格式
try:
# 尝试去掉星期几
dt = datetime.strptime(start_time_str.split(', ', 1)[1], "%B %d, %Y %I:%M:%S %p")
base_timestamp = dt.timestamp()
global_elapsed_accumulator = base_timestamp
print(f"{Fore.GREEN}✔ 使用简化格式Start Time作为时间基准: {start_time_str} -> {base_timestamp}")
except Exception as e2:
print(f"{Fore.RED}⚠ 二次解析Start Time失败: {start_time_str} - {e2}")
rows = []
# 跳过表头的两行(与原逻辑保持一致),但更安全地跳过 header_tr 所在的行
all_trs = table.find_all('tr')
start_index = 2 if len(all_trs) >= 3 else 1
for row in all_trs[start_index:]:
cols = [td.get_text(strip=True) for td in row.find_all(['td', 'th'])]
if len(cols) < 2:
continue
original_test_name = cols[1].strip()
if not original_test_name:
print(f"\rFile{Fore.RED}{self.currentFilename} 存在空的 Test name")
# 统计 FAIL 数量
if status_col_idx is not None and len(cols) > status_col_idx:
status_val = cols[status_col_idx].strip().upper()
if 'FAIL' in status_val:
file_fail_count += 1
elapsed_time_str = cols[9].strip() if len(cols) > 9 else "0"
# 处理 Test_Time 行,更新基准时间
if original_test_name == "Test_Time":
measurement_str = cols[7] if len(cols) > 7 else ""
try:
dt = datetime.strptime(measurement_str, "%m/%d/%Y %I:%M:%S %p")
timestamp = dt.timestamp()
base_timestamp = timestamp - float(elapsed_time_str)
global_elapsed_accumulator = base_timestamp
print(f"{Fore.GREEN}✔ 更新时间基准为Test_Time: {measurement_str} -> {base_timestamp}")
except Exception as e:
print(f"{Fore.RED}⚠ 解析Test_Time失败: {measurement_str} - {e}")
# 如果Test_Time解析失败保持使用Start Time作为基准
# 计算时间戳
try:
elapsed_append = global_elapsed_accumulator + float(elapsed_time_str)
except ValueError:
elapsed_append = global_elapsed_accumulator
# 插入清洗后的 test name、新增时间戳、来源文件名
cols.insert(test_name_idx + 1, self._clean_test_name(cols[test_name_idx]))
cols.append(elapsed_append)
cols.append(filename)
rows.append(cols)
# 返回给调用方,由调用方统一按相同 SN 键累加
return headers, rows, file_fail_count
def _store_data(self, sn, headers, rows):
"""存储解析后的数据(主线程调用)"""
if sn not in self.sn_data_map:
self.sn_data_map[sn] = {'headers': headers, 'data': []}
self.sn_data_map[sn]['data'].extend(rows)
class ExcelReportGenerator:
"""Excel报告生成器并行版"""
def __init__(self, output_dir, max_workers=None):
self.output_dir = output_dir
self.progress = ProgressTracker()
self.max_workers = max_workers or self._calc_max_workers(env_var="EXCEL_WORKERS")
def generate_reports(self, sn_data_map):
"""并行生成所有Excel报告"""
total_reports = len(sn_data_map.items())
errors = []
successes = []
print(f"\n{Fore.CYAN}▶ 开始并行生成Excel报告{total_reports}个),线程并发数: {self.max_workers}")
self.progress.begin(total_reports)
futures = []
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
for sn, data_info in sn_data_map.items():
futures.append(executor.submit(self._generate_one_report, sn, data_info))
for future in as_completed(futures):
try:
res = future.result()
except Exception as e:
res = {'success': False, 'error': f"未知异常: {type(e).__name__}: {e}"}
# 主线程更新进度与输出
self.progress.update(res.get('success', False), prefix='Excel生成: ')
if res.get('success'):
successes.append(res)
else:
errors.append(res.get('error'))
self.progress.end(prefix='Excel生成')
# 汇总输出结果
for s in successes[:50]:
print(f"{Fore.GREEN}✓ 生成成功 | 文件: {os.path.basename(s['output_file'])} | SN: {s['sn']} | 记录数: {s['records']} | 来源HTML: {s['source_files_count']} | FAIL总数: {s['fail_count']}")
if len(successes) > 50:
print(f"{Fore.GREEN}... 成功列表省略 {len(successes)-50}")
if errors:
print(f"\n{Fore.RED}✗ 以下报告生成失败(共 {len(errors)} 个):")
for err in errors[:50]:
print(f" - {err}")
if len(errors) > 50:
print(f" ... 其余 {len(errors) - 50} 条省略")
print(f"\n{Fore.CYAN}输出目录: {self.output_dir}")
def _calc_max_workers(self, env_var="EXCEL_WORKERS"):
"""根据机器性能自动计算线程数可通过环境变量覆盖EXCEL_WORKERS"""
override = os.getenv(env_var)
if override and override.isdigit():
return max(1, int(override))
cpu = os.cpu_count() or 2
# 写Excel主要是I/O适度并发但避免过高导致磁盘抖动
return max(2, min(16, cpu * 2))
def _generate_one_report(self, sn, data_info):
"""工作线程生成单个SN的Excel报告线程安全不打印"""
try:
base_name = f"{sn}_Report"
output_file = os.path.join(self.output_dir, f"{base_name}.xlsx")
df_all = self._prepare_dataframe(data_info)
# 动态识别状态列并统计失败数据更稳健的列识别与包含FAIL
status_col = self._detect_status_column(df_all)
if status_col:
fail_mask = df_all[status_col].astype(str).str.strip().str.upper().str.contains('FAIL')
df_fail = df_all[fail_mask]
fail_count = int(fail_mask.sum())
else:
df_fail = pd.DataFrame(columns=df_all.columns)
fail_count = 0
# 如果有FAIL项重命名文件
if fail_count > 0:
new_name = f"{base_name}_Fail-item-{fail_count}.xlsx"
output_file = os.path.join(self.output_dir, new_name)
# 报告统计数据
report_stats = data_info.get('report_stats', {})
source_files_count = report_stats.get('source_files_count', 0)
# 写Excel文件
self._save_excel(df_all, df_fail, output_file, sn, source_files_count, fail_count)
return {
'success': True,
'sn': sn,
'output_file': output_file,
'records': len(df_all),
'source_files_count': source_files_count,
'fail_count': fail_count,
}
except Exception as e:
return {
'success': False,
'error': f"SN: {sn} - {type(e).__name__}: {str(e)}"
}
def _detect_status_column(self, df):
"""自动检测状态列名称(增强:支持模糊匹配与大小写不敏感)"""
for col in df.columns:
col_str = str(col)
if re.search(r'\b(status|result)\b', col_str, flags=re.I) or col_str.strip().lower() in (
'status', 'result', 'test status'):
return col
return None
def _save_excel(self, df_all, df_fail, output_file, sn, source_files_count, fail_count):
"""保存Excel文件包含All Tests和FAIL list两个工作表及统计信息"""
try:
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
# 统计信息工作表(直接使用 df_fail 的数量)
stats_data = {
'统计项': ['SN号', '来源HTML文件数', '总FAIL数量', '生成时间'],
'': [sn, source_files_count, fail_count, datetime.now().strftime("%Y-%m-%d %H:%M:%S")]
}
df_stats = pd.DataFrame(stats_data)
df_stats.to_excel(writer, sheet_name='Report Stats', index=False)
# 原有工作表
df_all.to_excel(writer, sheet_name='All Tests', index=False)
df_fail.to_excel(writer, sheet_name='FAIL list', index=False)
# 设置列宽
workbook = writer.book
if 'Report Stats' in workbook.sheetnames:
worksheet = workbook['Report Stats']
worksheet.column_dimensions['A'].width = 20
worksheet.column_dimensions['B'].width = 30
except Exception as e:
raise RuntimeError(f"Excel文件保存失败: {str(e)}")
def _prepare_dataframe(self, data_info):
"""准备DataFrame保持解析时的列顺序"""
df = pd.DataFrame(data_info['data'], columns=data_info['headers'])
return df
class ReportProcessor:
"""主报告处理器(控制台版本)"""
def __init__(self):
pass
def process_reports(self):
"""处理完整流程"""
source_dir = self._get_directory_from_console()
if not source_dir:
print(f"{Fore.RED}❌ 未选择目录,程序退出")
return
output_dir = self._create_output_dir(source_dir)
processed_data = self._process_html_files(source_dir)
self._generate_excel_reports(output_dir, processed_data)
def _get_directory_from_console(self):
"""从控制台获取目录路径"""
while True:
print(f"\n{Fore.CYAN}=== HTML报告处理程序 ===")
print(f"{Fore.WHITE}请输入包含HTML文件的目录路径:")
path = input("> ").strip()
if not path:
print(f"{Fore.YELLOW}⚠ 路径不能为空,请重新输入")
continue
# 处理路径中的引号
path = path.strip('"\'')
if not os.path.exists(path):
print(f"{Fore.RED}❌ 路径不存在,请重新输入")
continue
if not os.path.isdir(path):
print(f"{Fore.RED}❌ 请输入目录路径,而不是文件路径")
continue
return path
def _create_output_dir(self, source_dir):
"""创建输出目录"""
output_dir = os.path.join(source_dir, f"Html文件分析_{datetime.now().strftime('%Y%m%d%H%M%S')}")
os.makedirs(output_dir, exist_ok=True)
print(f"{Fore.GREEN}✔ 输出目录创建成功: {output_dir}")
return output_dir
def _process_html_files(self, source_dir):
"""处理HTML文件并行"""
processor = HTMLReportProcessor()
return processor.process_files(source_dir)
def _generate_excel_reports(self, output_dir, data):
"""并行生成Excel报告"""
generator = ExcelReportGenerator(output_dir)
generator.generate_reports(data)
if __name__ == "__main__":
try:
processor = ReportProcessor()
processor.process_reports()
# 程序结束时暂停,方便用户查看结果
print(f"\n{Fore.CYAN}=== 程序执行完成 ===")
# input("按回车键退出...")
except KeyboardInterrupt:
print(f"\n{Fore.YELLOW}⚠ 用户中断程序")
except Exception as e:
print(f"\n{Fore.RED}❌ 程序执行出错: {type(e).__name__}: {str(e)}")
import traceback
traceback.print_exc()
input("traceback 按回车键退出...")

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,33 @@
/build/*
/build
/dist/*
/dist
/source/*
/source
htmlReportProcess_Merge_picHtml_V3.py
htmlReportProcess_Merge_picHtml_V2.py
htmlReportProcess_Merge_pic_V2.py
#/htmlReportProcess*/
htmlReportProcess_cmd_pV2.py
htmlReportProcess_cmd_pV3.py
htmlReportProcess_cmd_V2.py
htmlReportProcess.py
htmlReportProcess_Merge_cmd_V2.py
htmlReportProcess_Merge.py
htmlReportProcess_picHtml_1kV2.py
htmlReportProcess_picHtml_2kV2.py

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff