1173 lines
48 KiB
Python
1173 lines
48 KiB
Python
|
|
import pandas as pd
|
|||
|
|
from bs4 import BeautifulSoup
|
|||
|
|
import os
|
|||
|
|
import re
|
|||
|
|
import sys
|
|||
|
|
from datetime import datetime
|
|||
|
|
import pytz # 需要安装 pytz 库
|
|||
|
|
|
|||
|
|
from colorama import Fore, Style, init
|
|||
|
|
import multiprocessing as mp
|
|||
|
|
from concurrent.futures import ProcessPoolExecutor
|
|||
|
|
import threading
|
|||
|
|
from collections import defaultdict
|
|||
|
|
|
|||
|
|
init(autoreset=True)
|
|||
|
|
|
|||
|
|
|
|||
|
|
class ThreadSafeProgressTracker:
|
|||
|
|
"""线程安全的多进程进度跟踪器"""
|
|||
|
|
|
|||
|
|
def __init__(self, total_files):
|
|||
|
|
self.lock = threading.Lock()
|
|||
|
|
self.processed = 0
|
|||
|
|
self.total = total_files
|
|||
|
|
self.start_time = datetime.now()
|
|||
|
|
self.success_count = 0
|
|||
|
|
self.fail_count = 0
|
|||
|
|
|
|||
|
|
def update(self, success=True ,infor='', count=1):
|
|||
|
|
"""线程安全地更新进度"""
|
|||
|
|
with self.lock:
|
|||
|
|
self.processed += count
|
|||
|
|
if success:
|
|||
|
|
self.success_count += count
|
|||
|
|
else:
|
|||
|
|
self.fail_count += count
|
|||
|
|
|
|||
|
|
# 每处理10个文件或进度有显著变化时更新显示
|
|||
|
|
if self.processed % 10 == 0 or self.processed == self.total:
|
|||
|
|
self._display_progress(infor)
|
|||
|
|
|
|||
|
|
def _display_progress(self,infor=''):
|
|||
|
|
"""显示当前进度"""
|
|||
|
|
time_used = datetime.now() - self.start_time
|
|||
|
|
percent = self.processed / self.total * 100 if self.total > 0 else 0
|
|||
|
|
|
|||
|
|
# 计算处理速度
|
|||
|
|
elapsed_seconds = time_used.total_seconds()
|
|||
|
|
speed = self.processed / elapsed_seconds if elapsed_seconds > 0 else 0
|
|||
|
|
|
|||
|
|
progress_bar = f"[{'▉' * int(percent / 5)}{' ' * (20 - int(percent / 5))}]"
|
|||
|
|
|
|||
|
|
sys_info = [
|
|||
|
|
f"进度: {self.processed}/{self.total}",
|
|||
|
|
f"{percent:.1f}% {progress_bar}",
|
|||
|
|
f"成功: {self.success_count}",
|
|||
|
|
f"失败: {self.fail_count}",
|
|||
|
|
f"速度: {speed:.1f} 文件/秒",
|
|||
|
|
f"耗时: {self._format_timedelta(time_used)}",
|
|||
|
|
f"Infor:{infor}"
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
print('\x1b[2K\r' + ' | '.join(sys_info), end='', flush=True)
|
|||
|
|
|
|||
|
|
def finish(self, phase_name="处理"):
|
|||
|
|
"""完成进度跟踪"""
|
|||
|
|
# self._display_progress()
|
|||
|
|
print(f"\n{Fore.GREEN}✔ {phase_name}完成! 总耗时: {(datetime.now() - self.start_time).total_seconds():.1f}秒")
|
|||
|
|
print(f"{Fore.CYAN}成功: {self.success_count}, 失败: {self.fail_count}")
|
|||
|
|
|
|||
|
|
def _format_timedelta(self, delta):
|
|||
|
|
"""格式化时间差"""
|
|||
|
|
seconds = delta.total_seconds()
|
|||
|
|
return f"{int(seconds // 3600):02}:{int((seconds % 3600) // 60):02}:{int(seconds % 60):02}"
|
|||
|
|
|
|||
|
|
|
|||
|
|
class HTMLFileProcessor:
|
|||
|
|
"""HTML文件处理器(单文件处理)"""
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def _clean_test_name(raw_name):
|
|||
|
|
"""使用配置规则清洗测试名称"""
|
|||
|
|
rules = [
|
|||
|
|
(r'^Round\d+_\d+_', ''), # 移除Round前缀
|
|||
|
|
(r'_loop\d+$', ''), # 移除loop后缀
|
|||
|
|
(r'_Round\d+$', ''), # 如果还有其他模式
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
result = raw_name
|
|||
|
|
for pattern, replacement in rules:
|
|||
|
|
result = re.sub(pattern, replacement, result)
|
|||
|
|
|
|||
|
|
return result
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def _extract_test_cycle_time(filename):
|
|||
|
|
"""从文件名中提取测试周期时间并转换为标准格式"""
|
|||
|
|
try:
|
|||
|
|
# 匹配类似 "2025-11-21 13-23-16" 的格式
|
|||
|
|
time_match = re.search(r'\((\d{4}-\d{2}-\d{2}\s+\d{2}-\d{2}-\d{2})\)', filename)
|
|||
|
|
if time_match:
|
|||
|
|
time_str = time_match.group(1)
|
|||
|
|
|
|||
|
|
# 使用datetime进行精确解析和格式化
|
|||
|
|
dt = datetime.strptime(time_str, "%Y-%m-%d %H-%M-%S")
|
|||
|
|
formatted_time = dt.strftime("%Y-%m-%d %H:%M:%S")
|
|||
|
|
return formatted_time
|
|||
|
|
|
|||
|
|
except Exception:
|
|||
|
|
# 如果datetime解析失败,使用字符串处理作为备选方案
|
|||
|
|
try:
|
|||
|
|
if time_match:
|
|||
|
|
time_str = time_match.group(1)
|
|||
|
|
# 分割日期和时间部分
|
|||
|
|
date_part, time_part = time_str.split()
|
|||
|
|
# 只替换时间部分的分隔符
|
|||
|
|
formatted_time_part = time_part.replace('-', ':')
|
|||
|
|
formatted_time = f"{date_part} {formatted_time_part}"
|
|||
|
|
return formatted_time
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
|
|||
|
|
return "UNKNOWN_TIME"
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def _extract_sn_and_cell(soup, filename):
|
|||
|
|
"""提取SN号和cell编号"""
|
|||
|
|
try:
|
|||
|
|
sn_regex = r'F[A-Z0-9]{15}(?:[A-Z0-9]{5,})?'
|
|||
|
|
|
|||
|
|
# 提取SN
|
|||
|
|
sn = "UNKNOWN_SN"
|
|||
|
|
if soup is not None:
|
|||
|
|
sn_tag = soup.find('h3', string=re.compile(r'Serial Number:', re.I))
|
|||
|
|
if sn_tag:
|
|||
|
|
content_match = re.search(rf'\b({sn_regex})\b', sn_tag.get_text(), flags=re.I)
|
|||
|
|
if content_match:
|
|||
|
|
sn = content_match.group(1)
|
|||
|
|
else:
|
|||
|
|
html_text = soup.get_text(" ", strip=True)
|
|||
|
|
content_match = re.search(rf'\b({sn_regex})\b', html_text, flags=re.I)
|
|||
|
|
if content_match:
|
|||
|
|
sn = content_match.group(1)
|
|||
|
|
else:
|
|||
|
|
content_match = re.search(rf'\b({sn_regex})\b', filename, flags=re.I)
|
|||
|
|
if content_match:
|
|||
|
|
sn = content_match.group(1)
|
|||
|
|
|
|||
|
|
# 提取cell编号(从文件名末尾的"数字")
|
|||
|
|
cell_match = re.search(r'-(\d+)\.html$', filename)
|
|||
|
|
cell = cell_match.group(1) if cell_match else "UNKNOWN_CELL"
|
|||
|
|
|
|||
|
|
return sn, cell
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"{Fore.RED}⚠ SN/CELL提取失败: {filename} - {str(e)}")
|
|||
|
|
return "ERROR_SN", "ERROR_CELL"
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def _find_status_index(headers):
|
|||
|
|
"""识别状态列索引"""
|
|||
|
|
if not headers:
|
|||
|
|
return None
|
|||
|
|
for idx, h in enumerate(headers):
|
|||
|
|
h_norm = str(h).strip().lower()
|
|||
|
|
if h_norm in ('status', 'result', 'test status') or re.search(r'status|result', h_norm, flags=re.I):
|
|||
|
|
return idx
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def _count_fail_rows(table, status_col_idx):
|
|||
|
|
"""统计表格中的FAIL行数(在添加新列之前统计)"""
|
|||
|
|
fail_count = 0
|
|||
|
|
if status_col_idx is None:
|
|||
|
|
return fail_count
|
|||
|
|
|
|||
|
|
all_trs = table.find_all('tr')
|
|||
|
|
start_index = 2 if len(all_trs) >= 3 else 1
|
|||
|
|
|
|||
|
|
for row in all_trs[start_index:]:
|
|||
|
|
cols = [td.get_text(strip=True) for td in row.find_all(['td', 'th'])]
|
|||
|
|
if len(cols) > status_col_idx:
|
|||
|
|
status_val = cols[status_col_idx].strip().upper()
|
|||
|
|
if 'FAIL' in status_val:
|
|||
|
|
fail_count += 1
|
|||
|
|
|
|||
|
|
return fail_count
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def process_single_file(file_path):
|
|||
|
|
"""处理单个HTML文件(独立函数,便于并行化)"""
|
|||
|
|
try:
|
|||
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|||
|
|
html_content = f.read()
|
|||
|
|
soup = BeautifulSoup(html_content, 'html.parser')
|
|||
|
|
|
|||
|
|
filename = os.path.basename(file_path)
|
|||
|
|
sn, cell = HTMLFileProcessor._extract_sn_and_cell(soup, filename)
|
|||
|
|
test_cycle_time = HTMLFileProcessor._extract_test_cycle_time(filename)
|
|||
|
|
|
|||
|
|
# 确保SN不为None
|
|||
|
|
if sn is None:
|
|||
|
|
sn = "UNKNOWN_SN"
|
|||
|
|
if cell is None:
|
|||
|
|
cell = "UNKNOWN_CELL"
|
|||
|
|
if test_cycle_time is None:
|
|||
|
|
test_cycle_time = "UNKNOWN_TIME"
|
|||
|
|
|
|||
|
|
table = soup.find('table', border=1) or soup.find('table')
|
|||
|
|
if not table:
|
|||
|
|
return {"success": False, "error": "未找到有效数据表格", "file": filename}
|
|||
|
|
|
|||
|
|
# 首先识别状态列索引(在原始表头上)
|
|||
|
|
header_tr = table.find('tr', bgcolor='#eeeeee')
|
|||
|
|
if not header_tr:
|
|||
|
|
for tr in table.find_all('tr'):
|
|||
|
|
if tr.find('th'):
|
|||
|
|
header_tr = tr
|
|||
|
|
break
|
|||
|
|
if not header_tr:
|
|||
|
|
header_tr = table.find('tr')
|
|||
|
|
|
|||
|
|
original_headers = [th.get_text(strip=True) for th in header_tr.find_all(['th', 'td'])]
|
|||
|
|
if len(original_headers) > 11:
|
|||
|
|
original_headers = original_headers[:11]
|
|||
|
|
|
|||
|
|
status_col_idx = HTMLFileProcessor._find_status_index(original_headers)
|
|||
|
|
|
|||
|
|
# 先统计FAIL数量(在原始表格数据上统计)
|
|||
|
|
file_fail_count = HTMLFileProcessor._count_fail_rows(table, status_col_idx)
|
|||
|
|
|
|||
|
|
# 处理表格数据
|
|||
|
|
headers, rows = HTMLFileProcessor._process_table_data(
|
|||
|
|
table, html_content, filename, sn, cell, test_cycle_time, status_col_idx)
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
"success": True,
|
|||
|
|
"sn": sn,
|
|||
|
|
"cell": cell,
|
|||
|
|
"test_cycle_time": test_cycle_time,
|
|||
|
|
"filename": filename,
|
|||
|
|
"headers": headers,
|
|||
|
|
"rows": rows,
|
|||
|
|
"fail_count": file_fail_count
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
return {
|
|||
|
|
"success": False,
|
|||
|
|
"error": f"{type(e).__name__}: {str(e)}",
|
|||
|
|
"file": filename
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def _process_table_data(table, html_content, filename, sn, cell, test_cycle_time, status_col_idx):
|
|||
|
|
"""处理表格数据"""
|
|||
|
|
# 表头处理
|
|||
|
|
header_tr = table.find('tr', bgcolor='#eeeeee')
|
|||
|
|
if not header_tr:
|
|||
|
|
for tr in table.find_all('tr'):
|
|||
|
|
if tr.find('th'):
|
|||
|
|
header_tr = tr
|
|||
|
|
break
|
|||
|
|
if not header_tr:
|
|||
|
|
header_tr = table.find('tr')
|
|||
|
|
|
|||
|
|
headers = [th.get_text(strip=True) for th in header_tr.find_all(['th', 'td'])]
|
|||
|
|
if len(headers) > 11:
|
|||
|
|
headers = headers[:11]
|
|||
|
|
|
|||
|
|
# 插入新增列:SN、Cell、TestCycleTime
|
|||
|
|
headers.insert(0, 'SN')
|
|||
|
|
headers.insert(1, 'Cell')
|
|||
|
|
headers.insert(2, 'TestCycleTime')
|
|||
|
|
|
|||
|
|
# 原有的新增列
|
|||
|
|
test_name_idx = next((i for i, h in enumerate(headers) if re.search(r'test\s*name', h, flags=re.I)), 3)
|
|||
|
|
if test_name_idx < len(headers):
|
|||
|
|
headers.insert(test_name_idx + 1, 'Test Name New')
|
|||
|
|
headers.append('Test Time')
|
|||
|
|
headers.append('Source File name')
|
|||
|
|
|
|||
|
|
# 提取起始时间
|
|||
|
|
base_timestamp = HTMLFileProcessor._extract_base_timestamp(html_content, filename)
|
|||
|
|
# print(f"base_timestamp {base_timestamp}")
|
|||
|
|
global_elapsed_accumulator = base_timestamp if base_timestamp else 0.0
|
|||
|
|
|
|||
|
|
rows = []
|
|||
|
|
|
|||
|
|
# 处理数据行
|
|||
|
|
all_trs = table.find_all('tr')
|
|||
|
|
start_index = 2 if len(all_trs) >= 3 else 1
|
|||
|
|
|
|||
|
|
for row in all_trs[start_index:]:
|
|||
|
|
cols = [td.get_text(strip=True) for td in row.find_all(['td', 'th'])]
|
|||
|
|
if len(cols) < 2:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
original_test_name = cols[1].strip()
|
|||
|
|
|
|||
|
|
# 处理时间信息
|
|||
|
|
elapsed_time_str = cols[9].strip() if len(cols) > 9 else "0"
|
|||
|
|
elapsed_append , global_elapsed_accumulator = HTMLFileProcessor._calculate_timestamp(
|
|||
|
|
cols, original_test_name, elapsed_time_str, global_elapsed_accumulator)
|
|||
|
|
|
|||
|
|
if elapsed_append is not None and base_timestamp is None:
|
|||
|
|
global_elapsed_accumulator = elapsed_append - float(elapsed_time_str)
|
|||
|
|
|
|||
|
|
# 构建行数据:先插入新增的三个列
|
|||
|
|
cols.insert(0, sn) # SN列
|
|||
|
|
cols.insert(1, cell) # Cell列
|
|||
|
|
cols.insert(2, test_cycle_time) # TestCycleTime列
|
|||
|
|
|
|||
|
|
# 原有的列处理
|
|||
|
|
adjusted_test_name_idx = test_name_idx + 1 # 因为插入了3个新列
|
|||
|
|
if adjusted_test_name_idx + 1 < len(cols):
|
|||
|
|
cols.insert(adjusted_test_name_idx ,
|
|||
|
|
HTMLFileProcessor._clean_test_name(cols[test_name_idx]))
|
|||
|
|
# print(f"cols[test_name_idx]{cols[test_name_idx]}->{HTMLFileProcessor._clean_test_name(cols[test_name_idx])}")
|
|||
|
|
cols.append(elapsed_append if elapsed_append is not None else 0.0)
|
|||
|
|
cols.append(filename)
|
|||
|
|
rows.append(cols)
|
|||
|
|
|
|||
|
|
return headers, rows
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def _extract_base_timestamp(html_content, filename):
|
|||
|
|
"""提取基准时间戳"""
|
|||
|
|
start_time_match = re.search(r"Start Time:\s*(.+?)(?:\s*<|$)", html_content, re.IGNORECASE)
|
|||
|
|
if start_time_match:
|
|||
|
|
start_time_str = re.sub(r'<[^>]+>', '', start_time_match.group(1)).strip()
|
|||
|
|
try:
|
|||
|
|
dt = datetime.strptime(start_time_str, "%A, %B %d, %Y %I:%M:%S %p")
|
|||
|
|
dt = pytz.timezone('UTC').localize(dt) # 转换为北京、UTC时间
|
|||
|
|
# print(f"基于 测试报告概述中的时间信息解析到的测试启动时间:_extract_base_timestamp dt {dt} : {dt.timestamp()}")
|
|||
|
|
return dt.timestamp()
|
|||
|
|
except:
|
|||
|
|
try:
|
|||
|
|
dt = datetime.strptime(start_time_str.split(', ', 1)[1], "%B %d, %Y %I:%M:%S %p")
|
|||
|
|
return dt.timestamp()
|
|||
|
|
except:
|
|||
|
|
pass
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def _calculate_timestamp(cols, test_name, elapsed_time_str, base_accumulator):
|
|||
|
|
"""计算时间戳"""
|
|||
|
|
if test_name == "Test_Time" and len(cols) > 7:
|
|||
|
|
measurement_str = cols[7]
|
|||
|
|
try:
|
|||
|
|
dt = datetime.strptime(measurement_str, "%m/%d/%Y %I:%M:%S %p")
|
|||
|
|
dt = pytz.timezone('UTC').localize(dt) # 转换为北京、UTC时间
|
|||
|
|
# print(f"基于报告中 Test_Time 字段更新基准时间, _calculate_timestamp {dt}:{dt.timestamp()} \n base_accumulator:{base_accumulator}")
|
|||
|
|
base_accumulator = dt.timestamp()
|
|||
|
|
return base_accumulator,base_accumulator
|
|||
|
|
except:
|
|||
|
|
pass
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
return base_accumulator + float(elapsed_time_str) ,base_accumulator
|
|||
|
|
except ValueError:
|
|||
|
|
return base_accumulator,base_accumulator
|
|||
|
|
|
|||
|
|
|
|||
|
|
class ExcelReportWorker:
|
|||
|
|
"""Excel报告生成工作器(单个报告生成)"""
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def generate_single_report(report_data, output_dir):
|
|||
|
|
"""生成单个Excel报告"""
|
|||
|
|
try:
|
|||
|
|
sn = report_data["sn"]
|
|||
|
|
cell = report_data.get("cell", "UNKNOWN_CELL")
|
|||
|
|
all_cells = report_data.get("all_cells", "UNKNOWN_CELLlist")
|
|||
|
|
test_cycle_time = report_data.get("test_cycle_time", "UNKNOWN_TIME")
|
|||
|
|
data_info = report_data["data_info"]
|
|||
|
|
source_files_count = report_data["source_files_count"]
|
|||
|
|
|
|||
|
|
# 安全处理SN、cell和test_cycle_time,防止None值
|
|||
|
|
if sn is None:
|
|||
|
|
sn = "UNKNOWN_SN"
|
|||
|
|
if cell is None:
|
|||
|
|
cell = "UNKNOWN_CELL"
|
|||
|
|
if all_cells is None:
|
|||
|
|
all_cells = "UNKNOWN_CELLlist"
|
|||
|
|
if test_cycle_time is None:
|
|||
|
|
test_cycle_time = "UNKNOWN_TIME"
|
|||
|
|
|
|||
|
|
# 在文件名中体现cell编号
|
|||
|
|
cell_list_display = ','.join(str(c) for c in all_cells)
|
|||
|
|
# base_name = f"{sn}_C-{all_cells}_R-{source_files_count}"
|
|||
|
|
base_name = f"{sn}_C-{cell_list_display}_R-{source_files_count}"
|
|||
|
|
output_file = os.path.join(output_dir, f"{base_name}.xlsx")
|
|||
|
|
|
|||
|
|
# 检查数据是否有效
|
|||
|
|
if not data_info or 'headers' not in data_info or 'data' not in data_info:
|
|||
|
|
return {
|
|||
|
|
"success": False,
|
|||
|
|
"sn": sn,
|
|||
|
|
"cell": cell,
|
|||
|
|
"all_cells": all_cells,
|
|||
|
|
"test_cycle_time": test_cycle_time,
|
|||
|
|
"error": "数据格式无效或为空"
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
df_all = ExcelReportWorker._prepare_dataframe(data_info)
|
|||
|
|
|
|||
|
|
if df_all.empty:
|
|||
|
|
return {
|
|||
|
|
"success": False,
|
|||
|
|
"sn": sn,
|
|||
|
|
"cell": cell,
|
|||
|
|
"all_cells": all_cells,
|
|||
|
|
"test_cycle_time": test_cycle_time,
|
|||
|
|
"error": "DataFrame为空,无数据可生成"
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 识别状态列并统计失败(使用文件处理时统计的FAIL数量)
|
|||
|
|
status_col = ExcelReportWorker._detect_status_column(df_all)
|
|||
|
|
fail_count = data_info.get('report_stats', {}).get('fail_count', 0)
|
|||
|
|
|
|||
|
|
# 提取TestCycleTime相关统计信息
|
|||
|
|
time_stats = ExcelReportWorker._extract_time_statistics(df_all, data_info)
|
|||
|
|
|
|||
|
|
if status_col and fail_count == 0:
|
|||
|
|
# 如果文件处理时统计为0,但在DataFrame中可能有FAIL,进行双重检查
|
|||
|
|
fail_mask = df_all[status_col].astype(str).str.strip().str.upper().str.contains('FAIL')
|
|||
|
|
fail_count = int(fail_mask.sum())
|
|||
|
|
df_fail = df_all[fail_mask]
|
|||
|
|
elif fail_count > 0:
|
|||
|
|
# 使用文件处理时统计的FAIL数量,创建FAIL子集
|
|||
|
|
if status_col:
|
|||
|
|
fail_mask = df_all[status_col].astype(str).str.strip().str.upper().str.contains('FAIL')
|
|||
|
|
df_fail = df_all[fail_mask]
|
|||
|
|
else:
|
|||
|
|
df_fail = pd.DataFrame(columns=df_all.columns)
|
|||
|
|
else:
|
|||
|
|
df_fail = pd.DataFrame(columns=df_all.columns)
|
|||
|
|
|
|||
|
|
# 根据失败数量调整文件名(保留cell编号)
|
|||
|
|
if fail_count > 0:
|
|||
|
|
new_name = f"{base_name}_Fitem-{fail_count}.xlsx"
|
|||
|
|
output_file = os.path.join(output_dir, new_name)
|
|||
|
|
|
|||
|
|
# 报告统计(包含cell编号和测试周期时间信息)
|
|||
|
|
report_stats = data_info.get('report_stats', {})
|
|||
|
|
source_files_count = report_stats.get('source_files_count', 0)
|
|||
|
|
cell_info = report_stats.get('cell_info', {})
|
|||
|
|
|
|||
|
|
# 保存Excel(包含cell编号和测试周期时间信息)
|
|||
|
|
ExcelReportWorker._save_excel(df_all, df_fail, output_file, sn, cell, test_cycle_time,
|
|||
|
|
source_files_count, fail_count, cell_info, time_stats)
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
"success": True,
|
|||
|
|
"sn": sn,
|
|||
|
|
"cell": cell,
|
|||
|
|
"all_cells": all_cells,
|
|||
|
|
"test_cycle_time": test_cycle_time,
|
|||
|
|
"output_file": output_file,
|
|||
|
|
"record_count": len(df_all),
|
|||
|
|
"source_files_count": source_files_count,
|
|||
|
|
"fail_count": fail_count
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
return {
|
|||
|
|
"success": False,
|
|||
|
|
"sn": sn if sn is not None else "UNKNOWN_SN",
|
|||
|
|
"cell": cell if cell is not None else "UNKNOWN_CELL",
|
|||
|
|
"all_cells": all_cells if all_cells is not None else "UNKNOWN_CELL",
|
|||
|
|
"test_cycle_time": test_cycle_time if test_cycle_time is not None else "UNKNOWN_TIME",
|
|||
|
|
"error": f"{type(e).__name__}: {str(e)}"
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def _extract_time_statistics(df_all, data_info):
|
|||
|
|
"""提取TestCycleTime相关统计信息"""
|
|||
|
|
time_stats = {
|
|||
|
|
'all_times': [],
|
|||
|
|
'time_count': 0,
|
|||
|
|
'time_distribution': {},
|
|||
|
|
'records_by_time': {},
|
|||
|
|
'time_range': {}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 从数据中提取TestCycleTime信息
|
|||
|
|
if 'TestCycleTime' in df_all.columns:
|
|||
|
|
time_values = df_all['TestCycleTime'].dropna().unique()
|
|||
|
|
time_stats['all_times'] = sorted(list(time_values))
|
|||
|
|
time_stats['time_count'] = len(time_values)
|
|||
|
|
|
|||
|
|
# 每个时间段的记录数量
|
|||
|
|
time_counts = df_all['TestCycleTime'].value_counts().to_dict()
|
|||
|
|
time_stats['time_distribution'] = time_counts
|
|||
|
|
|
|||
|
|
# 每个时间段的记录详情
|
|||
|
|
for time_val in time_values:
|
|||
|
|
time_records = df_all[df_all['TestCycleTime'] == time_val]
|
|||
|
|
time_stats['records_by_time'][time_val] = len(time_records)
|
|||
|
|
|
|||
|
|
# 时间范围(如果有时间信息)
|
|||
|
|
try:
|
|||
|
|
datetime_objects = []
|
|||
|
|
for time_str in time_values:
|
|||
|
|
if time_str != "UNKNOWN_TIME":
|
|||
|
|
try:
|
|||
|
|
dt = datetime.strptime(time_str, "%Y-%m-%d %H:%M:%S")
|
|||
|
|
datetime_objects.append(dt)
|
|||
|
|
except:
|
|||
|
|
pass
|
|||
|
|
|
|||
|
|
if datetime_objects:
|
|||
|
|
min_time = min(datetime_objects)
|
|||
|
|
max_time = max(datetime_objects)
|
|||
|
|
time_stats['time_range'] = {
|
|||
|
|
'start': min_time.strftime("%Y-%m-%d %H:%M:%S"),
|
|||
|
|
'end': max_time.strftime("%Y-%m-%d %H:%M:%S"),
|
|||
|
|
'duration_hours': round((max_time - min_time).total_seconds() / 3600, 2)
|
|||
|
|
}
|
|||
|
|
except:
|
|||
|
|
pass
|
|||
|
|
|
|||
|
|
# 从report_stats中获取补充信息
|
|||
|
|
report_stats = data_info.get('report_stats', {})
|
|||
|
|
time_info = report_stats.get('time_info', {})
|
|||
|
|
if time_info.get('all_times'):
|
|||
|
|
# 合并两个来源的时间信息
|
|||
|
|
existing_times = set(time_stats['all_times'])
|
|||
|
|
new_times = set(time_info.get('all_times', []))
|
|||
|
|
all_combined_times = sorted(list(existing_times.union(new_times)))
|
|||
|
|
time_stats['all_times'] = all_combined_times
|
|||
|
|
time_stats['time_count'] = len(all_combined_times)
|
|||
|
|
|
|||
|
|
return time_stats
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def _detect_status_column(df):
|
|||
|
|
"""检测状态列"""
|
|||
|
|
for col in df.columns:
|
|||
|
|
col_str = str(col)
|
|||
|
|
if re.search(r'\b(status|result)\b', col_str, flags=re.I) or col_str.strip().lower() in (
|
|||
|
|
'status', 'result', 'test status'):
|
|||
|
|
return col
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def _save_excel(df_all, df_fail, output_file, sn, cell, test_cycle_time, source_files_count, fail_count,
|
|||
|
|
cell_info=None, time_stats=None):
|
|||
|
|
try:
|
|||
|
|
# 预处理:对对象列的超长字符串裁剪、填充 NaN
|
|||
|
|
def _sanitize_df(df):
|
|||
|
|
df = df.copy()
|
|||
|
|
obj_cols = df.select_dtypes(include=['object']).columns
|
|||
|
|
# 裁剪到 Excel 单元格上限 32767
|
|||
|
|
for c in obj_cols:
|
|||
|
|
df[c] = df[c].astype(str).str.slice(0, 32767)
|
|||
|
|
# 可选:填充 NaN 以避免"空看起来像缺数据"
|
|||
|
|
df[obj_cols] = df[obj_cols].fillna('')
|
|||
|
|
return df
|
|||
|
|
|
|||
|
|
df_all = _sanitize_df(df_all)
|
|||
|
|
df_fail = _sanitize_df(df_fail) if df_fail is not None and not df_fail.empty else df_fail
|
|||
|
|
|
|||
|
|
with pd.ExcelWriter(
|
|||
|
|
output_file,
|
|||
|
|
engine='xlsxwriter',
|
|||
|
|
engine_kwargs={'options': {
|
|||
|
|
'strings_to_urls': False,
|
|||
|
|
'strings_to_formulas': False
|
|||
|
|
}}
|
|||
|
|
) as writer:
|
|||
|
|
# Report Stats(扩展统计信息,包含TestCycleTime详细信息)
|
|||
|
|
stats_data = ExcelReportWorker._prepare_stats_data(
|
|||
|
|
sn, cell, test_cycle_time, source_files_count, fail_count,
|
|||
|
|
cell_info, time_stats
|
|||
|
|
)
|
|||
|
|
pd.DataFrame(stats_data).to_excel(writer, sheet_name='Report Stats', index=False)
|
|||
|
|
|
|||
|
|
# All Tests(分片写入,避免超过 Excel 行数)
|
|||
|
|
MAX_ROWS = 1_048_576
|
|||
|
|
CHUNK = 200_000 # 可根据机器调整
|
|||
|
|
total_rows = len(df_all)
|
|||
|
|
|
|||
|
|
if total_rows == 0:
|
|||
|
|
# 空表也创建一个空 Sheet,避免看起来"不完整"
|
|||
|
|
pd.DataFrame(columns=df_all.columns).to_excel(writer, sheet_name='All Tests', index=False)
|
|||
|
|
elif total_rows <= MAX_ROWS - 1: # -1 给表头预留一行
|
|||
|
|
df_all.to_excel(writer, sheet_name='All Tests', index=False)
|
|||
|
|
else:
|
|||
|
|
# 超过行数限制,拆分到多个 Sheet
|
|||
|
|
for start in range(0, total_rows, MAX_ROWS - 1):
|
|||
|
|
end = min(start + (MAX_ROWS - 1), total_rows)
|
|||
|
|
sheet_name = f'All Tests_{start // (MAX_ROWS - 1) + 1}'
|
|||
|
|
df_all.iloc[start:end].to_excel(writer, sheet_name=sheet_name, index=False)
|
|||
|
|
|
|||
|
|
# FAIL list 仅在有数据时写,避免空表开销和误判
|
|||
|
|
if fail_count > 0 and df_fail is not None and not df_fail.empty:
|
|||
|
|
df_fail.to_excel(writer, sheet_name='FAIL list', index=False)
|
|||
|
|
|
|||
|
|
# TestCycleTime Details(新增:测试周期时间详情表)
|
|||
|
|
if time_stats and time_stats['all_times']:
|
|||
|
|
ExcelReportWorker._create_time_details_sheet(writer, time_stats)
|
|||
|
|
|
|||
|
|
# 列宽仅在小表上设置;避免对大表(多达几十万行)做格式化
|
|||
|
|
wb = writer.book
|
|||
|
|
ws = writer.sheets.get('Report Stats')
|
|||
|
|
if ws is not None:
|
|||
|
|
ws.set_column(0, 0, 25) # A列加宽以容纳更多统计项
|
|||
|
|
ws.set_column(1, 1, 40) # B列加宽以容纳更长的时间列表
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
raise RuntimeError(f"Excel文件保存失败: {str(e)}")
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def _prepare_stats_data(sn, cell, test_cycle_time, source_files_count, fail_count,
|
|||
|
|
cell_info, time_stats):
|
|||
|
|
"""准备统计页面数据(扩展包含TestCycleTime详细信息)"""
|
|||
|
|
stats_data = {
|
|||
|
|
'统计项': [],
|
|||
|
|
'值': []
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 基础统计信息
|
|||
|
|
base_stats = {
|
|||
|
|
'SN号': sn,
|
|||
|
|
'Cell编号': cell,
|
|||
|
|
'主要测试周期时间': test_cycle_time,
|
|||
|
|
'来源HTML文件数': source_files_count,
|
|||
|
|
'总FAIL数量': fail_count,
|
|||
|
|
'生成时间': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
for key, value in base_stats.items():
|
|||
|
|
stats_data['统计项'].append(key)
|
|||
|
|
stats_data['值'].append(value)
|
|||
|
|
|
|||
|
|
# TestCycleTime详细信息
|
|||
|
|
if time_stats:
|
|||
|
|
# 测试周期时间数量
|
|||
|
|
stats_data['统计项'].append('测试周期时间总数')
|
|||
|
|
stats_data['值'].append(time_stats.get('time_count', 0))
|
|||
|
|
|
|||
|
|
# 测试周期时间列表(前10个)
|
|||
|
|
all_times = time_stats.get('all_times', [])
|
|||
|
|
time_list_display = ', '.join(str(t) for t in all_times[:10])
|
|||
|
|
if len(all_times) > 10:
|
|||
|
|
time_list_display += f'...等{len(all_times)}个时间段'
|
|||
|
|
|
|||
|
|
stats_data['统计项'].append('测试周期时间列表')
|
|||
|
|
stats_data['值'].append(time_list_display)
|
|||
|
|
|
|||
|
|
# 时间范围信息
|
|||
|
|
time_range = time_stats.get('time_range', {})
|
|||
|
|
if time_range:
|
|||
|
|
stats_data['统计项'].append('测试时间范围')
|
|||
|
|
stats_data['值'].append(f"{time_range.get('start', '')} 至 {time_range.get('end', '')}")
|
|||
|
|
|
|||
|
|
stats_data['统计项'].append('测试持续时长(小时)')
|
|||
|
|
stats_data['值'].append(time_range.get('duration_hours', 0))
|
|||
|
|
|
|||
|
|
# 按时间分布统计
|
|||
|
|
time_distribution = time_stats.get('time_distribution', {})
|
|||
|
|
if time_distribution:
|
|||
|
|
top_times = sorted(time_distribution.items(), key=lambda x: x[1], reverse=True)[:5]
|
|||
|
|
dist_display = ', '.join([f"{time}({count})" for time, count in top_times])
|
|||
|
|
if len(time_distribution) > 5:
|
|||
|
|
dist_display += f'...等{len(time_distribution)}个分布'
|
|||
|
|
|
|||
|
|
stats_data['统计项'].append('记录数时间分布(前5)')
|
|||
|
|
stats_data['值'].append(dist_display)
|
|||
|
|
|
|||
|
|
# Cell详细信息
|
|||
|
|
if cell_info:
|
|||
|
|
all_cells = cell_info.get('all_cells', [])
|
|||
|
|
cell_count = cell_info.get('cell_count', 0)
|
|||
|
|
stats_data['统计项'].extend(['Cell数量', 'Cell列表'])
|
|||
|
|
|
|||
|
|
cell_list_display = ','.join(str(c) for c in all_cells[:10])
|
|||
|
|
if len(all_cells) > 10:
|
|||
|
|
cell_list_display += f'...等{len(all_cells)}个'
|
|||
|
|
|
|||
|
|
stats_data['值'].extend([cell_count, cell_list_display])
|
|||
|
|
|
|||
|
|
return stats_data
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def _create_time_details_sheet(writer, time_stats):
|
|||
|
|
"""创建TestCycleTime详情工作表"""
|
|||
|
|
time_details_data = []
|
|||
|
|
|
|||
|
|
all_times = time_stats.get('all_times', [])
|
|||
|
|
time_distribution = time_stats.get('time_distribution', {})
|
|||
|
|
records_by_time = time_stats.get('records_by_time', {})
|
|||
|
|
|
|||
|
|
for time_val in all_times:
|
|||
|
|
record_count = records_by_time.get(time_val, time_distribution.get(time_val, 0))
|
|||
|
|
time_details_data.append({
|
|||
|
|
'测试周期时间': time_val,
|
|||
|
|
'记录数量': record_count,
|
|||
|
|
'占比(%)': round(record_count / sum(time_distribution.values()) * 100, 2) if time_distribution else 0
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
if time_details_data:
|
|||
|
|
df_time_details = pd.DataFrame(time_details_data)
|
|||
|
|
df_time_details.to_excel(writer, sheet_name='TestCycleTime Details', index=False)
|
|||
|
|
|
|||
|
|
# 设置TestCycleTime Details工作表的列宽
|
|||
|
|
wb = writer.book
|
|||
|
|
ws = writer.sheets.get('TestCycleTime Details')
|
|||
|
|
if ws is not None:
|
|||
|
|
ws.set_column(0, 0, 25) # 测试周期时间列
|
|||
|
|
ws.set_column(1, 1, 15) # 记录数量列
|
|||
|
|
ws.set_column(2, 2, 15) # 占比列
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def _prepare_dataframe(data_info):
|
|||
|
|
"""准备DataFrame"""
|
|||
|
|
if not data_info['data']:
|
|||
|
|
return pd.DataFrame()
|
|||
|
|
|
|||
|
|
df = pd.DataFrame(data_info['data'], columns=data_info['headers'])
|
|||
|
|
cols = data_info['headers'].copy()
|
|||
|
|
test_name_idx = cols.index('Test Name') if 'Test Name' in cols else next(
|
|||
|
|
(i for i, h in enumerate(cols) if re.search(r'test\s*name', h, flags=re.I)), 0)
|
|||
|
|
cols.insert(test_name_idx + 1, cols.pop(test_name_idx + 1))
|
|||
|
|
return df[cols]
|
|||
|
|
|
|||
|
|
|
|||
|
|
class ParallelHTMLReportProcessor:
|
|||
|
|
"""并行HTML报告处理器"""
|
|||
|
|
|
|||
|
|
def __init__(self):
|
|||
|
|
self.sn_data_map = {}
|
|||
|
|
self.sn_source_files = defaultdict(set)
|
|||
|
|
self.sn_fail_counts = defaultdict(int)
|
|||
|
|
self.sn_file_counts = defaultdict(int)
|
|||
|
|
self.sn_cell_info = defaultdict(set) # 存储每个SN对应的cell编号
|
|||
|
|
self.sn_test_cycle_times = defaultdict(set) # 存储每个SN对应的测试周期时间
|
|||
|
|
|
|||
|
|
def process_files(self, source_dir, max_workers=None):
|
|||
|
|
"""并行处理目录中的所有文件"""
|
|||
|
|
all_files = self._scan_files(source_dir)
|
|||
|
|
|
|||
|
|
if not all_files:
|
|||
|
|
print(f"{Fore.YELLOW}⚠ 未找到HTML文件")
|
|||
|
|
return self.sn_data_map
|
|||
|
|
|
|||
|
|
# 预扫描SN分布
|
|||
|
|
print(f"{Fore.YELLOW}⌛ 正在扫描文件分布...")
|
|||
|
|
self._collect_sn_distribution(all_files)
|
|||
|
|
|
|||
|
|
# 显示文件分布
|
|||
|
|
self._display_file_distribution()
|
|||
|
|
|
|||
|
|
# 设置工作进程数
|
|||
|
|
if max_workers is None:
|
|||
|
|
max_workers = min(mp.cpu_count(), len(all_files))
|
|||
|
|
|
|||
|
|
print(f"{Fore.CYAN}▶ 开始并行处理 {len(all_files)} 个文件 (使用 {max_workers} 个进程)")
|
|||
|
|
|
|||
|
|
# 创建进度跟踪器
|
|||
|
|
progress_tracker = ThreadSafeProgressTracker(len(all_files))
|
|||
|
|
|
|||
|
|
# 使用进程池并行处理
|
|||
|
|
with ProcessPoolExecutor(max_workers=max_workers) as executor:
|
|||
|
|
# 提交所有任务
|
|||
|
|
future_to_file = {executor.submit(HTMLFileProcessor.process_single_file, file_path): file_path
|
|||
|
|
for file_path in all_files}
|
|||
|
|
|
|||
|
|
# 处理结果
|
|||
|
|
infor =''
|
|||
|
|
for future in future_to_file:
|
|||
|
|
result = future.result()
|
|||
|
|
success = result["success"]
|
|||
|
|
|
|||
|
|
if success:
|
|||
|
|
self._store_result_data(result)
|
|||
|
|
|
|||
|
|
# progress_tracker.update(success)
|
|||
|
|
|
|||
|
|
if not success:
|
|||
|
|
# print(f"\n{Fore.RED}⚠ 处理失败: {result['file']} - {result['error']}")
|
|||
|
|
infor = infor + f"{Fore.RED}⚠ 处理失败: {result['file']} - {result['error']}|"
|
|||
|
|
progress_tracker.update(success,infor)
|
|||
|
|
|
|||
|
|
progress_tracker.finish(phase_name="HTML文件处理")
|
|||
|
|
self._add_report_statistics()
|
|||
|
|
|
|||
|
|
return self.sn_data_map
|
|||
|
|
|
|||
|
|
def _scan_files(self, source_dir):
|
|||
|
|
"""扫描HTML文件"""
|
|||
|
|
all_files = []
|
|||
|
|
for root_dir, _, files in os.walk(source_dir):
|
|||
|
|
all_files.extend(
|
|||
|
|
[os.path.join(root_dir, f) for f in files if f.lower().endswith(('.html', '.htm'))]
|
|||
|
|
)
|
|||
|
|
return all_files
|
|||
|
|
|
|||
|
|
def _collect_sn_distribution(self, file_list):
|
|||
|
|
"""预扫描SN分布"""
|
|||
|
|
for file_path in file_list:
|
|||
|
|
filename = os.path.basename(file_path)
|
|||
|
|
sn, cell = self._extract_sn_and_cell_from_filename(filename)
|
|||
|
|
test_cycle_time = HTMLFileProcessor._extract_test_cycle_time(filename)
|
|||
|
|
self.sn_file_counts[sn] += 1
|
|||
|
|
self.sn_cell_info[sn].add(cell)
|
|||
|
|
self.sn_test_cycle_times[sn].add(test_cycle_time)
|
|||
|
|
|
|||
|
|
def _extract_sn_and_cell_from_filename(self, filename):
|
|||
|
|
"""从文件名提取SN和cell编号"""
|
|||
|
|
sn_regex = r'F[A-Z0-9]{15}(?:[A-Z0-9]{5,})?'
|
|||
|
|
sn_match = re.search(rf'\b({sn_regex})\b', filename, flags=re.I)
|
|||
|
|
cell_match = re.search(r'-(\d+)\.html$', filename)
|
|||
|
|
|
|||
|
|
sn = sn_match.group(1) if sn_match else "UNKNOWN_SN"
|
|||
|
|
cell = cell_match.group(1) if cell_match else "UNKNOWN_CELL"
|
|||
|
|
|
|||
|
|
return sn, cell
|
|||
|
|
|
|||
|
|
def _display_file_distribution(self):
|
|||
|
|
"""显示文件分布(包含cell编号和测试周期时间信息)"""
|
|||
|
|
dist_info = []
|
|||
|
|
for sn, count in list(self.sn_file_counts.items())[:10]:
|
|||
|
|
# 获取该SN对应的cell信息
|
|||
|
|
cells = list(self.sn_cell_info.get(sn, set()))
|
|||
|
|
cell_display = ', '.join(sorted(cells)[:3]) if cells else "未知"
|
|||
|
|
if len(cells) > 3:
|
|||
|
|
cell_display += f"...等{len(cells)}个"
|
|||
|
|
|
|||
|
|
# 获取该SN对应的测试周期时间信息
|
|||
|
|
times = list(self.sn_test_cycle_times.get(sn, set()))
|
|||
|
|
time_display = ', '.join(sorted(times)[:11]) if times else "未知"
|
|||
|
|
if len(times) > 11:
|
|||
|
|
time_display += f"...等{len(times)}个"
|
|||
|
|
|
|||
|
|
dist_info.append(f"{sn[:31]:<32}: {count}个文件, Cells: {cell_display:>3}, 时间: {time_display}")
|
|||
|
|
|
|||
|
|
if len(self.sn_file_counts) > 10:
|
|||
|
|
dist_info.append(f"... 还有 {len(self.sn_file_counts) - 10} 个SN")
|
|||
|
|
|
|||
|
|
print(f"{Fore.MAGENTA}⚫ SN文件分布:\n{Fore.CYAN}{chr(10).join(dist_info)}")
|
|||
|
|
|
|||
|
|
def _store_result_data(self, result):
|
|||
|
|
"""存储处理结果"""
|
|||
|
|
sn = result["sn"]
|
|||
|
|
cell = result.get("cell", "UNKNOWN_CELL")
|
|||
|
|
test_cycle_time = result.get("test_cycle_time", "UNKNOWN_TIME")
|
|||
|
|
filename = result["filename"]
|
|||
|
|
|
|||
|
|
# 记录文件来源和cell信息
|
|||
|
|
self.sn_source_files[sn].add(filename)
|
|||
|
|
self.sn_cell_info[sn].add(cell)
|
|||
|
|
self.sn_test_cycle_times[sn].add(test_cycle_time)
|
|||
|
|
|
|||
|
|
# 存储数据
|
|||
|
|
if sn not in self.sn_data_map:
|
|||
|
|
self.sn_data_map[sn] = {'headers': result["headers"], 'data': []}
|
|||
|
|
|
|||
|
|
self.sn_data_map[sn]['data'].extend(result["rows"])
|
|||
|
|
|
|||
|
|
# 累加FAIL数量
|
|||
|
|
self.sn_fail_counts[sn] += result["fail_count"]
|
|||
|
|
|
|||
|
|
def _add_report_statistics(self):
|
|||
|
|
"""添加报告统计信息(包含cell和测试周期时间信息)"""
|
|||
|
|
for sn, data_info in self.sn_data_map.items():
|
|||
|
|
source_count = len(self.sn_source_files.get(sn, []))
|
|||
|
|
cell_set = self.sn_cell_info.get(sn, set())
|
|||
|
|
time_set = self.sn_test_cycle_times.get(sn, set())
|
|||
|
|
cell_list = list(cell_set)
|
|||
|
|
time_list = list(time_set)
|
|||
|
|
primary_cell = cell_list[0] if cell_list else "UNKNOWN_CELL"
|
|||
|
|
primary_time = time_list[0] if time_list else "UNKNOWN_TIME"
|
|||
|
|
|
|||
|
|
data_info['report_stats'] = {
|
|||
|
|
'source_files_count': source_count,
|
|||
|
|
'fail_count': self.sn_fail_counts.get(sn, 0),
|
|||
|
|
'cell_info': {
|
|||
|
|
'primary_cell': primary_cell,
|
|||
|
|
'all_cells': cell_list,
|
|||
|
|
'cell_count': len(cell_set)
|
|||
|
|
},
|
|||
|
|
'time_info': {
|
|||
|
|
'primary_time': primary_time,
|
|||
|
|
'all_times': time_list,
|
|||
|
|
'time_count': len(time_set)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
class ParallelExcelReportGenerator:
|
|||
|
|
"""并行Excel报告生成器"""
|
|||
|
|
|
|||
|
|
def __init__(self, output_dir):
|
|||
|
|
self.output_dir = output_dir
|
|||
|
|
|
|||
|
|
def generate_reports(self, sn_data_map, max_workers=None):
|
|||
|
|
"""并行生成Excel报告"""
|
|||
|
|
total_reports = len(sn_data_map)
|
|||
|
|
|
|||
|
|
if total_reports == 0:
|
|||
|
|
print(f"{Fore.YELLOW}⚠ 没有数据可生成报告")
|
|||
|
|
return [], []
|
|||
|
|
|
|||
|
|
# 设置工作进程数
|
|||
|
|
if max_workers is None:
|
|||
|
|
max_workers = min(mp.cpu_count(), total_reports)
|
|||
|
|
|
|||
|
|
print(f"{Fore.CYAN}▶ 开始并行生成Excel报告 (共{total_reports}个,使用 {max_workers} 个进程)")
|
|||
|
|
|
|||
|
|
# 创建进度跟踪器
|
|||
|
|
progress_tracker = ThreadSafeProgressTracker(total_reports)
|
|||
|
|
|
|||
|
|
# 准备报告数据(包含cell和测试周期时间信息)
|
|||
|
|
report_tasks = []
|
|||
|
|
for sn, data_info in sn_data_map.items():
|
|||
|
|
cell_info = data_info.get('report_stats', {}).get('cell_info', {})
|
|||
|
|
time_info = data_info.get('report_stats', {}).get('time_info', {})
|
|||
|
|
source_files_count = data_info.get('report_stats', {}).get('source_files_count', {})
|
|||
|
|
primary_cell = cell_info.get('primary_cell', 'UNKNOWN_CELL')
|
|||
|
|
all_cells = cell_info.get('all_cells', 'UNKNOWN_CELLlist')
|
|||
|
|
primary_time = time_info.get('primary_time', 'UNKNOWN_TIME')
|
|||
|
|
|
|||
|
|
report_tasks.append({
|
|||
|
|
"sn": sn,
|
|||
|
|
"cell": primary_cell,
|
|||
|
|
"all_cells": all_cells,
|
|||
|
|
"test_cycle_time": primary_time,
|
|||
|
|
"data_info": data_info,
|
|||
|
|
"source_files_count": source_files_count
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
success_reports = []
|
|||
|
|
failed_reports = []
|
|||
|
|
|
|||
|
|
# 使用进程池并行生成报告
|
|||
|
|
with ProcessPoolExecutor(max_workers=max_workers) as executor:
|
|||
|
|
# 提交所有任务
|
|||
|
|
future_to_report = {
|
|||
|
|
executor.submit(ExcelReportWorker.generate_single_report, task, self.output_dir): task
|
|||
|
|
for task in report_tasks
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 处理结果
|
|||
|
|
for future in future_to_report:
|
|||
|
|
result = future.result()
|
|||
|
|
|
|||
|
|
if result["success"]:
|
|||
|
|
success_reports.append(result)
|
|||
|
|
progress_tracker.update(success=True, count=1)
|
|||
|
|
|
|||
|
|
# 安全显示成功信息(包含cell和测试周期时间信息)
|
|||
|
|
self._show_success_info(result)
|
|||
|
|
else:
|
|||
|
|
failed_reports.append(result)
|
|||
|
|
progress_tracker.update(success=False, count=1)
|
|||
|
|
|
|||
|
|
# 安全显示错误信息(包含cell和测试周期时间信息)
|
|||
|
|
self._show_error_info(result)
|
|||
|
|
|
|||
|
|
progress_tracker.finish(phase_name="Excel报告生成")
|
|||
|
|
|
|||
|
|
# 显示最终统计(包含cell和测试周期时间信息)
|
|||
|
|
self._show_final_stats(success_reports, failed_reports)
|
|||
|
|
|
|||
|
|
return success_reports, failed_reports
|
|||
|
|
|
|||
|
|
def _show_success_info(self, result):
|
|||
|
|
"""安全显示单个成功报告信息(包含cell和测试周期时间信息)"""
|
|||
|
|
try:
|
|||
|
|
# 安全处理SN显示
|
|||
|
|
sn_display = str(result.get('sn', 'UNKNOWN_SN'))[:32]
|
|||
|
|
if result.get('sn') and len(str(result['sn'])) > 32:
|
|||
|
|
sn_display += "..."
|
|||
|
|
|
|||
|
|
# 安全处理cell显示
|
|||
|
|
# cell_display = str(result.get('cell', 'UNKNOWN_CELL'))
|
|||
|
|
cell_list_display = ','.join(str(c) for c in result.get('all_cells', 'UNKNOWN_CELL'))
|
|||
|
|
cell_display = str( cell_list_display )
|
|||
|
|
|
|||
|
|
# 安全处理测试周期时间显示
|
|||
|
|
time_display = str(result.get('test_cycle_time', 'UNKNOWN_TIME'))[:20]
|
|||
|
|
|
|||
|
|
# 安全处理文件名显示
|
|||
|
|
output_file = result.get('output_file', '')
|
|||
|
|
file_name = os.path.basename(output_file) if output_file else '未知文件'
|
|||
|
|
|
|||
|
|
success_info = [
|
|||
|
|
f"{Fore.GREEN}✓ 生成成功",
|
|||
|
|
f"SN: {sn_display:<32}",
|
|||
|
|
f"Cell: {cell_display:<8}",
|
|||
|
|
f"时间: {time_display}",
|
|||
|
|
f"文件: {file_name:<60}",
|
|||
|
|
f"记录数: {result.get('record_count', 0):>5}",
|
|||
|
|
f"来源文件: {result.get('source_files_count', 0):>2}",
|
|||
|
|
f"FAIL数量: {result.get('fail_count', 0):>3}"
|
|||
|
|
]
|
|||
|
|
print('\x1b[2K\r' + ' | '.join(success_info).ljust(120))
|
|||
|
|
except Exception as e:
|
|||
|
|
# 如果显示信息时出错,使用简化显示
|
|||
|
|
print(
|
|||
|
|
f"{Fore.GREEN}✓ 报告生成成功 (SN: {result.get('sn', 'UNKNOWN_SN')}, Cell: {result.get('cell', 'UNKNOWN_CELL')}, Time: {result.get('test_cycle_time', 'UNKNOWN_TIME')})")
|
|||
|
|
|
|||
|
|
def _show_error_info(self, result):
|
|||
|
|
"""安全显示单个失败报告信息(包含cell和测试周期时间信息)"""
|
|||
|
|
try:
|
|||
|
|
# 安全处理SN显示
|
|||
|
|
sn_display = str(result.get('sn', 'UNKNOWN_SN'))[:32]
|
|||
|
|
if result.get('sn') and len(str(result['sn'])) > 32:
|
|||
|
|
sn_display += "..."
|
|||
|
|
|
|||
|
|
# 安全处理cell显示
|
|||
|
|
cell_display = str(result.get('cell', 'UNKNOWN_CELL'))
|
|||
|
|
|
|||
|
|
# 安全处理测试周期时间显示
|
|||
|
|
time_display = str(result.get('test_cycle_time', 'UNKNOWN_TIME'))[:20]
|
|||
|
|
|
|||
|
|
# 安全处理错误信息
|
|||
|
|
error_msg = str(result.get('error', '未知错误'))[:50]
|
|||
|
|
if len(str(result.get('error', ''))) > 50:
|
|||
|
|
error_msg += "..."
|
|||
|
|
|
|||
|
|
error_info = [
|
|||
|
|
f"{Fore.RED}✗ 生成失败",
|
|||
|
|
f"SN: {sn_display}",
|
|||
|
|
f"Cell: {cell_display}",
|
|||
|
|
f"时间: {time_display}",
|
|||
|
|
f"错误: {error_msg}"
|
|||
|
|
]
|
|||
|
|
print('\x1b[2K\r' + ' | '.join(error_info).ljust(100))
|
|||
|
|
except Exception as e:
|
|||
|
|
# 如果显示信息时出错,使用简化显示
|
|||
|
|
print(
|
|||
|
|
f"{Fore.RED}✗ 报告生成失败 (SN: {result.get('sn', 'UNKNOWN_SN')}, Cell: {result.get('cell', 'UNKNOWN_CELL')}, Time: {result.get('test_cycle_time', 'UNKNOWN_TIME')})")
|
|||
|
|
|
|||
|
|
def _show_final_stats(self, success_reports, failed_reports):
|
|||
|
|
"""显示最终统计信息(包含cell和测试周期时间信息)"""
|
|||
|
|
try:
|
|||
|
|
total_records = sum(report.get('record_count', 0) for report in success_reports)
|
|||
|
|
total_sources = sum(report.get('source_files_count', 0) for report in success_reports)
|
|||
|
|
total_fails = sum(report.get('fail_count', 0) for report in success_reports)
|
|||
|
|
|
|||
|
|
# 统计涉及的不同cell数量
|
|||
|
|
unique_cells = set(report.get('cell', 'UNKNOWN_CELL') for report in success_reports + failed_reports)
|
|||
|
|
# 统计涉及的不同测试周期时间数量
|
|||
|
|
unique_times = set(
|
|||
|
|
report.get('test_cycle_time', 'UNKNOWN_TIME') for report in success_reports + failed_reports)
|
|||
|
|
|
|||
|
|
print(f"\n{Fore.CYAN}=== 最终统计 ===")
|
|||
|
|
print(f"{Fore.GREEN}成功生成报告: {len(success_reports)} 个")
|
|||
|
|
print(f"{Fore.RED}失败报告: {len(failed_reports)} 个")
|
|||
|
|
print(f"{Fore.BLUE}总记录数: {total_records}")
|
|||
|
|
print(f"{Fore.BLUE}总来源文件: {total_sources}")
|
|||
|
|
print(f"{Fore.BLUE}总FAIL数量: {total_fails}")
|
|||
|
|
print(f"{Fore.BLUE}涉及Cell数量: {len(unique_cells)}")
|
|||
|
|
print(f"{Fore.BLUE}涉及测试周期时间数量: {len(unique_times)}")
|
|||
|
|
print(f"{Fore.CYAN}输出目录: {self.output_dir}")
|
|||
|
|
|
|||
|
|
if failed_reports:
|
|||
|
|
print(f"\n{Fore.YELLOW}失败报告详情:")
|
|||
|
|
for report in failed_reports:
|
|||
|
|
sn = report.get('sn', 'UNKNOWN_SN')
|
|||
|
|
cell = report.get('cell', 'UNKNOWN_CELL')
|
|||
|
|
time = report.get('test_cycle_time', 'UNKNOWN_TIME')
|
|||
|
|
error = report.get('error', '未知错误')
|
|||
|
|
print(f" {sn} (Cell {cell}, Time {time}): {error}")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"{Fore.RED}⚠ 统计信息显示出错: {e}")
|
|||
|
|
|
|||
|
|
|
|||
|
|
class ReportProcessor:
|
|||
|
|
"""主报告处理器"""
|
|||
|
|
|
|||
|
|
def __init__(self):
|
|||
|
|
pass
|
|||
|
|
|
|||
|
|
def process_reports(self, html_max_workers=None, excel_max_workers=None):
|
|||
|
|
"""处理完整流程"""
|
|||
|
|
source_dir = self._get_directory_from_console()
|
|||
|
|
if not source_dir:
|
|||
|
|
print(f"{Fore.RED}❌ 未选择目录,程序退出")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
output_dir = self._create_output_dir(source_dir)
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# 阶段1:并行处理HTML文件
|
|||
|
|
print(f"\n{Fore.CYAN}=== 阶段1: HTML文件处理 ===")
|
|||
|
|
html_processor = ParallelHTMLReportProcessor()
|
|||
|
|
processed_data = html_processor.process_files(source_dir, html_max_workers)
|
|||
|
|
|
|||
|
|
if not processed_data:
|
|||
|
|
print(f"{Fore.YELLOW}⚠ 没有处理任何数据,程序结束")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
# 阶段2:并行生成Excel报告
|
|||
|
|
print(f"\n{Fore.CYAN}=== 阶段2: Excel报告生成 ===")
|
|||
|
|
excel_generator = ParallelExcelReportGenerator(output_dir)
|
|||
|
|
success_reports, failed_reports = excel_generator.generate_reports(
|
|||
|
|
processed_data, excel_max_workers)
|
|||
|
|
|
|||
|
|
# 安全显示总体结果
|
|||
|
|
self._show_overall_result(len(processed_data), (success_reports), (failed_reports))
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"\n{Fore.RED}❌ 程序执行出错: {type(e).__name__}: {str(e)}")
|
|||
|
|
import traceback
|
|||
|
|
traceback.print_exc()
|
|||
|
|
|
|||
|
|
def _get_directory_from_console(self):
|
|||
|
|
"""从控制台获取目录路径"""
|
|||
|
|
while True:
|
|||
|
|
print(f"\n{Fore.CYAN}=== 并行HTML报告处理程序 ===")
|
|||
|
|
print(f"{Fore.WHITE}请输入包含HTML文件的目录路径:")
|
|||
|
|
path = input("> ").strip()
|
|||
|
|
|
|||
|
|
if not path:
|
|||
|
|
print(f"{Fore.YELLOW}⚠ 路径不能为空,请重新输入")
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
path = path.strip('"\'')
|
|||
|
|
|
|||
|
|
if not os.path.exists(path):
|
|||
|
|
print(f"{Fore.RED}❌ 路径不存在,请重新输入")
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
if not os.path.isdir(path):
|
|||
|
|
print(f"{Fore.RED}❌ 请输入目录路径,而不是文件路径")
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
return path
|
|||
|
|
|
|||
|
|
def _create_output_dir(self, source_dir):
|
|||
|
|
"""创建输出目录"""
|
|||
|
|
output_dir = os.path.join(source_dir, f"Html文件分析_带Cell编号_{datetime.now().strftime('%Y%m%d%H%M%S')}")
|
|||
|
|
os.makedirs(output_dir, exist_ok=True)
|
|||
|
|
print(f"{Fore.GREEN}✔ 输出目录创建成功: {output_dir}")
|
|||
|
|
return output_dir
|
|||
|
|
|
|||
|
|
def _show_overall_result(self, total_sn, success_reports, failed_reports):
|
|||
|
|
"""安全显示总体结果(包含cell和测试周期时间信息)"""
|
|||
|
|
print(f"\n{Fore.CYAN}=== 程序执行完成 ===")
|
|||
|
|
print(f"{Fore.GREEN}✓ 处理完成!")
|
|||
|
|
print(f"{Fore.BLUE}总SN数量: {total_sn}")
|
|||
|
|
print(f"{Fore.GREEN}成功报告: {len(success_reports)}")
|
|||
|
|
print(f"{Fore.RED}失败报告: {len(failed_reports)}")
|
|||
|
|
|
|||
|
|
if len(failed_reports) == 0:
|
|||
|
|
print(f"{Fore.GREEN}🎉 所有报告生成成功!")
|
|||
|
|
else:
|
|||
|
|
print(f"{Fore.YELLOW}⚠ 有 {len(failed_reports)} 个报告生成失败,请查看上述错误信息")
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
try:
|
|||
|
|
processor = ReportProcessor()
|
|||
|
|
|
|||
|
|
# 可分别指定HTML处理和Excel生成的进程数
|
|||
|
|
processor.process_reports(
|
|||
|
|
html_max_workers=None, # HTML处理进程数
|
|||
|
|
excel_max_workers=None # Excel生成进程数
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
except KeyboardInterrupt:
|
|||
|
|
print(f"\n{Fore.YELLOW}⚠ 用户中断程序")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"\n{Fore.RED}❌ 程序执行出错: {type(e).__name__}: {str(e)}")
|
|||
|
|
import traceback
|
|||
|
|
|
|||
|
|
traceback.print_exc()
|