From efddb997e256ae65d84ad8e8017a571c68a4f9a5 Mon Sep 17 00:00:00 2001 From: panxiang <1275280643@qq.com> Date: Sat, 4 Apr 2026 21:40:28 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=20=20html=E7=9A=84=E8=A7=A3?= =?UTF-8?q?=E6=9E=90=E4=B8=AD=20=E5=AF=B9=E4=BA=8E=20cell=20=E7=9A=84?= =?UTF-8?q?=E8=A7=A3=E6=9E=90=EF=BC=8Cexcel=E7=9A=84=E5=A4=84=E7=90=86?= =?UTF-8?q?=E4=B8=AD=E5=AF=B9=E4=BA=8Ecell=20=E7=9A=84=E8=A7=A3=E6=9E=90?= =?UTF-8?q?=E3=80=82=E4=BB=A5=E5=8F=8A=E6=B8=A9=E5=BA=A6=E5=A4=84=E7=90=86?= =?UTF-8?q?=E4=B8=AD=20=E5=AF=B9=E4=BA=8E=20=E6=95=B0=E6=8D=AE=E7=9A=84?= =?UTF-8?q?=E5=A4=84=E7=90=86=E5=A2=9E=E5=8A=A0=20=E6=97=B6=E9=97=B4?= =?UTF-8?q?=E6=88=B3=E7=9A=84=E6=8D=A2=E7=AE=97=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../htmlReportProcess_cmd_pV1.py | 112 +++++++++++++++--- .../htmlReportProcess_picHtml_2kV1.py | 2 +- tempReportProcess/tempReportProcess_V1.py | 43 ++++++- 3 files changed, 133 insertions(+), 24 deletions(-) diff --git a/htmlProcess/htmlReportProcess_cmd_p/htmlReportProcess_cmd_pV1.py b/htmlProcess/htmlReportProcess_cmd_p/htmlReportProcess_cmd_pV1.py index 9cd7e20..38d9b0c 100644 --- a/htmlProcess/htmlReportProcess_cmd_p/htmlReportProcess_cmd_pV1.py +++ b/htmlProcess/htmlReportProcess_cmd_p/htmlReportProcess_cmd_pV1.py @@ -50,6 +50,17 @@ class LogManager: def log_statistics(self, statistics_data): """记录统计信息到日志文件""" + + def mixed_sort_key(item): + """处理混合类型(数字字符串和普通字符串)的排序键""" + try: + if isinstance(item, str) and item.isdigit(): + return (0, int(item)) + else: + return (1, str(item)) + except (ValueError, TypeError): + return (2, str(item)) + with open(self.log_file, 'a', encoding='utf-8') as f: f.write("\n" + "=" * 80 + "\n") f.write("处理统计汇总\n") @@ -73,7 +84,9 @@ class LogManager: f.write(f"{"="*30}\n") # 按Cell编号排序 - sorted_cells = sorted(cell_stats.keys(), key=lambda x: int(x) if x.isdigit() else x) + # sorted_cells = sorted(cell_stats.keys(), key=lambda x: int(x) if x.isdigit() else x) + # 使用修复后的排序方法 + sorted_cells = sorted(cell_stats.keys(), key=mixed_sort_key) for cell in sorted_cells: stats = cell_stats[cell] @@ -92,7 +105,7 @@ class LogManager: sorted_fail_items = sorted(fail_details.items(), key=lambda x: x[1], reverse=True)[:10] for test_name, count in sorted_fail_items: fail_items.append(f"{test_name}({count}次)") - if len (sorted_fail_items)>10: + if len (fail_details.items())>10: lastitems = sorted(fail_details.items(), key=lambda x: x[1], reverse=True)[10:] messageappend ='' for test_name, count in lastitems: @@ -123,7 +136,10 @@ class LogManager: fail_details = stats.get('fail_details', {}) # Cell列表(逗号分隔) - cell_list = ','.join(sorted(cells, key=lambda x: int(x) if x.isdigit() else x)) + # cell_list = ','.join(sorted(cells, key=lambda x: int(x) if x.isdigit() else x)) + # 使用修复后的排序方法 + cell_list = ','.join(sorted(cells, key=mixed_sort_key)) + # 失败项详情 fail_items = [] @@ -292,31 +308,82 @@ class HTMLFileProcessor: @staticmethod def _extract_sn_and_cell(soup, filename): - """提取SN号和cell编号""" + """提取SN号和cell编号 - 支持多种来源""" try: sn_regex = r'F[A-Z0-9]{15}(?:[A-Z0-9]{5,})?' + cell_regex = r'\b\d+\b' # 匹配纯数字的cell编号 - # 提取SN + # 初始化默认值 sn = "UNKNOWN_SN" + cell = "UNKNOWN_CELL" + + # 优先从HTML内容中提取 if soup is not None: + html_text = soup.get_text(" ", strip=True) + + # 方法1: 从Serial Number标签提取SN sn_tag = soup.find('h3', string=re.compile(r'Serial Number:', re.I)) if sn_tag: content_match = re.search(rf'\b({sn_regex})\b', sn_tag.get_text(), flags=re.I) if content_match: sn = content_match.group(1) - else: - html_text = soup.get_text(" ", strip=True) + + # 方法2: 从Test Cell标签提取Cell编号 + cell_tag = soup.find('h3', string=re.compile(r'Test Cell:', re.I)) + if cell_tag: + cell_content = cell_tag.get_text() + cell_match = re.search(rf'\b({cell_regex})\b', cell_content) + if cell_match: + cell = cell_match.group(1) + + # 方法3: 从整个HTML文本中搜索SN(如果前面没找到) + if sn == "UNKNOWN_SN": content_match = re.search(rf'\b({sn_regex})\b', html_text, flags=re.I) if content_match: sn = content_match.group(1) - else: + + # 方法4: 从整个HTML文本中搜索Cell编号(如果前面没找到) + if cell == "UNKNOWN_CELL": + # 搜索包含"cell"或"CELL"的文本段,然后提取数字 + cell_sections = re.findall(r'(?i)(?:cell|test cell)[^\d]*(\d+)', html_text) + if cell_sections: + cell = cell_sections[0] # 取第一个匹配的cell编号 + + # 如果HTML中没找到,从文件名中提取 + if sn == "UNKNOWN_SN": content_match = re.search(rf'\b({sn_regex})\b', filename, flags=re.I) if content_match: sn = content_match.group(1) - # 提取cell编号(从文件名末尾的"数字") - cell_match = re.search(r'-(\d+)\.html$', filename) - cell = cell_match.group(1) if cell_match else "UNKNOWN_CELL" + if cell == "UNKNOWN_CELL": + # 从文件名中提取cell编号(多种模式) + # 模式1: 破折号后、下划线或点号前的数字 + pattern1 = r'-(\d+)[._]' + # 模式2: 包含"cell"或"CELL"后跟数字 + pattern2 = r'(?i)(?:cell|CELL)[^\d]*(\d+)' + # 模式3: 文件名末尾的数字 + pattern3 = r'(\d+)\.html?$' + + for pattern in [pattern1, pattern2, pattern3]: + cell_match = re.search(pattern, filename) + if cell_match: + cell = cell_match.group(1) + break + + # 额外的Cell编号提取逻辑(如果上述方法都没找到) + if cell == "UNKNOWN_CELL" and soup is not None: + # 尝试从表格或其他结构中提取Cell编号 + try: + # 查找包含"Cell"的表格单元格 + cell_tds = soup.find_all('td', string=re.compile(r'(?i)cell')) + for td in cell_tds: + parent_text = td.parent.get_text() + cell_match = re.search(rf'\b({cell_regex})\b', parent_text) + if cell_match: + cell = cell_match.group(1) + break + except: + pass return sn, cell @@ -1147,7 +1214,10 @@ class ParallelHTMLReportProcessor: """从文件名提取SN和cell编号""" sn_regex = r'F[A-Z0-9]{15}(?:[A-Z0-9]{5,})?' sn_match = re.search(rf'\b({sn_regex})\b', filename, flags=re.I) - cell_match = re.search(r'-(\d+)\.html$', filename) + # 匹配模式:破折号后、下划线或点号前的数字 + pattern = r'-(\d+)[._]' + # cell_match = re.search(r'-(\d+)\.html$', filename) + cell_match = re.search(pattern, filename) sn = sn_match.group(1) if sn_match else "UNKNOWN_SN" cell = cell_match.group(1) if cell_match else "UNKNOWN_CELL" @@ -1572,11 +1642,15 @@ if __name__ == "__main__": # 关键:防止打包环境下的重复执行 mp.freeze_support() - # 额外保护:确保只在主进程中执行 - if mp.current_process().name == 'MainProcess': - main() - input(f"输入任意结束程序......") - else: - # 子进程不需要执行任何交互代码 - pass + try: + # 额外保护:确保只在主进程中执行 + if mp.current_process().name == 'MainProcess': + main() + input(f"输入任意结束程序......") + else: + # 子进程不需要执行任何交互代码 + pass + + except KeyboardInterrupt: + print(f"用户中断程序") diff --git a/htmlProcess/htmlReportProcess_picHtml/htmlReportProcess_picHtml_2kV1.py b/htmlProcess/htmlReportProcess_picHtml/htmlReportProcess_picHtml_2kV1.py index 2b4529a..bd36440 100644 --- a/htmlProcess/htmlReportProcess_picHtml/htmlReportProcess_picHtml_2kV1.py +++ b/htmlProcess/htmlReportProcess_picHtml/htmlReportProcess_picHtml_2kV1.py @@ -977,7 +977,7 @@ class MultiFileTestReportScatterPlotter: print(f"警告:发现 {failed_conversions} 个 'Cell' 值无法转换为数字,这些条目将保留为字符串或根据业务逻辑处理。") # 业务决策:对于无法转换的,可以保留原字符串,或使用一个默认值 # 例如,将无法转换的条目其数值编号设为-1或一个特定的标识值 - # test_data.loc[test_data['Cell编号_数值'].isna(), 'Cell编号_数值'] = -1 + test_data.loc[test_data['Cell编号_数值'].isna(), 'Cell编号_数值'] = -1 # 此时,您可以根据需求选择使用 'Cell编号'(字符串)或 'Cell编号_数值'(数字)进行后续分组和可视化 # 对于绘图着色和排序,使用 'Cell编号_数值' 列 diff --git a/tempReportProcess/tempReportProcess_V1.py b/tempReportProcess/tempReportProcess_V1.py index dde4e13..de972bb 100644 --- a/tempReportProcess/tempReportProcess_V1.py +++ b/tempReportProcess/tempReportProcess_V1.py @@ -27,10 +27,10 @@ class TemperatureDataAnalyzer: # 常见中文字体候选(跨平台) candidates = [ "Microsoft YaHei", "Microsoft YaHei UI", # Windows - "SimHei", "SimSun", # Windows(黑体/宋体) - "PingFang SC", "Heiti SC", # macOS + "SimHei", "SimSun", # Windows(黑体/宋体) + "PingFang SC", "Heiti SC", # macOS "Noto Sans CJK SC", "Source Han Sans SC", "WenQuanYi Micro Hei", # Linux - "Arial Unicode MS" # 覆盖广的 Unicode 字体 + "Arial Unicode MS" # 覆盖广的 Unicode 字体 ] available = {f.name for f in font_manager.fontManager.ttflist} for name in candidates: @@ -60,7 +60,7 @@ class TemperatureDataAnalyzer: return True def load_and_process_data(self): - """加载和处理数据""" + """加载和处理数据,并保存带时间戳的新文件""" try: # 读取CSV文件,无表头 self.data = pd.read_csv(self.file_path, header=None) @@ -71,6 +71,19 @@ class TemperatureDataAnalyzer: # 转换时间戳格式(文本例如:10/29/2025 2:20:41 PM) self.data['datetime'] = pd.to_datetime(self.data['timestamp'], format='%m/%d/%Y %I:%M:%S %p') + # 将转换后的datetime对象存储到D列(原数据只有3列,所以新增第4列) + self.data['converted_timestamp'] = self.data['datetime'] + + + # 新增第5列:存储转换后的时间戳( UTC的时间戳,精确到ms) + # self.data['utc_timestamp_ms'] = (self.data['datetime'].astype('int64') // 10**6) + self.data['utc_timestamp_ms'] = (self.data['datetime'].astype('int64') // 10**9) + + + + # 保存带时间戳的新CSV文件 + self._save_csv_with_timestamp() + # 提取处理后的数据 self.timestamps = self.data['datetime'] self.temperatures = self.data['temperature'] @@ -83,6 +96,28 @@ class TemperatureDataAnalyzer: print(f"数据处理错误: {e}") return False + def _save_csv_with_timestamp(self): + """保存带时间戳的新CSV文件""" + try: + # 生成新文件名(原文件名+时间戳) + base_filename = os.path.splitext(os.path.basename(self.file_path))[0] + timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S") + output_filename = f"{base_filename}_with_timestamp_{timestamp_str}.csv" + output_dir = os.path.dirname(self.file_path) + output_path = os.path.join(output_dir, output_filename) + + # # 选择需要保存的列:原始三列 + 转换后的时间戳列 + # columns_to_save = ['timestamp', 'temperature', 'status', 'converted_timestamp'] + # 选择需要保存的列:原始三列 + 转换后的时间戳列 + 格式化时间戳列 + columns_to_save = ['timestamp', 'temperature', 'status', 'converted_timestamp', 'utc_timestamp_ms'] + + self.data[columns_to_save].to_csv(output_path, index=False, header=False) + + print(f"已保存带时间戳的新CSV文件: {output_path}") + + except Exception as e: + print(f"保存带时间戳CSV文件时出错: {e}") + def create_scatter_plots(self): """创建散点图""" fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))