From efddb997e256ae65d84ad8e8017a571c68a4f9a5 Mon Sep 17 00:00:00 2001
From: panxiang <1275280643@qq.com>
Date: Sat, 4 Apr 2026 21:40:28 +0800
Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=20=20html=E7=9A=84=E8=A7=A3?=
 =?UTF-8?q?=E6=9E=90=E4=B8=AD=20=E5=AF=B9=E4=BA=8E=20cell=20=E7=9A=84?=
 =?UTF-8?q?=E8=A7=A3=E6=9E=90=EF=BC=8Cexcel=E7=9A=84=E5=A4=84=E7=90=86?=
 =?UTF-8?q?=E4=B8=AD=E5=AF=B9=E4=BA=8Ecell=20=E7=9A=84=E8=A7=A3=E6=9E=90?=
 =?UTF-8?q?=E3=80=82=E4=BB=A5=E5=8F=8A=E6=B8=A9=E5=BA=A6=E5=A4=84=E7=90=86?=
 =?UTF-8?q?=E4=B8=AD=20=E5=AF=B9=E4=BA=8E=20=E6=95=B0=E6=8D=AE=E7=9A=84?=
 =?UTF-8?q?=E5=A4=84=E7=90=86=E5=A2=9E=E5=8A=A0=20=E6=97=B6=E9=97=B4?=
 =?UTF-8?q?=E6=88=B3=E7=9A=84=E6=8D=A2=E7=AE=97=E3=80=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../htmlReportProcess_cmd_pV1.py              | 112 +++++++++++++++---
 .../htmlReportProcess_picHtml_2kV1.py         |   2 +-
 tempReportProcess/tempReportProcess_V1.py     |  43 ++++++-
 3 files changed, 133 insertions(+), 24 deletions(-)

diff --git a/htmlProcess/htmlReportProcess_cmd_p/htmlReportProcess_cmd_pV1.py b/htmlProcess/htmlReportProcess_cmd_p/htmlReportProcess_cmd_pV1.py
index 9cd7e20..38d9b0c 100644
--- a/htmlProcess/htmlReportProcess_cmd_p/htmlReportProcess_cmd_pV1.py
+++ b/htmlProcess/htmlReportProcess_cmd_p/htmlReportProcess_cmd_pV1.py
@@ -50,6 +50,17 @@ class LogManager:
 
     def log_statistics(self, statistics_data):
         """记录统计信息到日志文件"""
+
+        def mixed_sort_key(item):
+            """处理混合类型（数字字符串和普通字符串）的排序键"""
+            try:
+                if isinstance(item, str) and item.isdigit():
+                    return (0, int(item))
+                else:
+                    return (1, str(item))
+            except (ValueError, TypeError):
+                return (2, str(item))
+
         with open(self.log_file, 'a', encoding='utf-8') as f:
             f.write("\n" + "=" * 80 + "\n")
             f.write("处理统计汇总\n")
@@ -73,7 +84,9 @@ class LogManager:
                 f.write(f"{"="*30}\n")
 
                 # 按Cell编号排序
-                sorted_cells = sorted(cell_stats.keys(), key=lambda x: int(x) if x.isdigit() else x)
+                # sorted_cells = sorted(cell_stats.keys(), key=lambda x: int(x) if x.isdigit() else x)
+                # 使用修复后的排序方法
+                sorted_cells = sorted(cell_stats.keys(), key=mixed_sort_key)
 
                 for cell in sorted_cells:
                     stats = cell_stats[cell]
@@ -92,7 +105,7 @@ class LogManager:
                         sorted_fail_items = sorted(fail_details.items(), key=lambda x: x[1], reverse=True)[:10]
                         for test_name, count in sorted_fail_items:
                             fail_items.append(f"{test_name}({count}次)")
-                        if len (sorted_fail_items)>10:
+                        if len (fail_details.items())>10:
                             lastitems = sorted(fail_details.items(), key=lambda x: x[1], reverse=True)[10:]
                             messageappend =''
                             for test_name, count in lastitems:
@@ -123,7 +136,10 @@ class LogManager:
                     fail_details = stats.get('fail_details', {})
 
                     # Cell列表（逗号分隔）
-                    cell_list = ','.join(sorted(cells, key=lambda x: int(x) if x.isdigit() else x))
+                    # cell_list = ','.join(sorted(cells, key=lambda x: int(x) if x.isdigit() else x))
+                    # 使用修复后的排序方法
+                    cell_list = ','.join(sorted(cells, key=mixed_sort_key))
+
 
                     # 失败项详情
                     fail_items = []
@@ -292,31 +308,82 @@ class HTMLFileProcessor:
 
     @staticmethod
     def _extract_sn_and_cell(soup, filename):
-        """提取SN号和cell编号"""
+        """提取SN号和cell编号 - 支持多种来源"""
         try:
             sn_regex = r'F[A-Z0-9]{15}(?:[A-Z0-9]{5,})?'
+            cell_regex = r'\b\d+\b'  # 匹配纯数字的cell编号
 
-            # 提取SN
+            # 初始化默认值
             sn = "UNKNOWN_SN"
+            cell = "UNKNOWN_CELL"
+
+            # 优先从HTML内容中提取
             if soup is not None:
+                html_text = soup.get_text(" ", strip=True)
+
+                # 方法1: 从Serial Number标签提取SN
                 sn_tag = soup.find('h3', string=re.compile(r'Serial Number:', re.I))
                 if sn_tag:
                     content_match = re.search(rf'\b({sn_regex})\b', sn_tag.get_text(), flags=re.I)
                     if content_match:
                         sn = content_match.group(1)
-                else:
-                    html_text = soup.get_text(" ", strip=True)
+
+                # 方法2: 从Test Cell标签提取Cell编号
+                cell_tag = soup.find('h3', string=re.compile(r'Test Cell:', re.I))
+                if cell_tag:
+                    cell_content = cell_tag.get_text()
+                    cell_match = re.search(rf'\b({cell_regex})\b', cell_content)
+                    if cell_match:
+                        cell = cell_match.group(1)
+
+                # 方法3: 从整个HTML文本中搜索SN（如果前面没找到）
+                if sn == "UNKNOWN_SN":
                     content_match = re.search(rf'\b({sn_regex})\b', html_text, flags=re.I)
                     if content_match:
                         sn = content_match.group(1)
-            else:
+
+                # 方法4: 从整个HTML文本中搜索Cell编号（如果前面没找到）
+                if cell == "UNKNOWN_CELL":
+                    # 搜索包含"cell"或"CELL"的文本段，然后提取数字
+                    cell_sections = re.findall(r'(?i)(?:cell|test cell)[^\d]*(\d+)', html_text)
+                    if cell_sections:
+                        cell = cell_sections[0]  # 取第一个匹配的cell编号
+
+            # 如果HTML中没找到，从文件名中提取
+            if sn == "UNKNOWN_SN":
                 content_match = re.search(rf'\b({sn_regex})\b', filename, flags=re.I)
                 if content_match:
                     sn = content_match.group(1)
 
-            # 提取cell编号（从文件名末尾的"数字"）
-            cell_match = re.search(r'-(\d+)\.html$', filename)
-            cell = cell_match.group(1) if cell_match else "UNKNOWN_CELL"
+            if cell == "UNKNOWN_CELL":
+                # 从文件名中提取cell编号（多种模式）
+                # 模式1: 破折号后、下划线或点号前的数字
+                pattern1 = r'-(\d+)[._]'
+                # 模式2: 包含"cell"或"CELL"后跟数字
+                pattern2 = r'(?i)(?:cell|CELL)[^\d]*(\d+)'
+                # 模式3: 文件名末尾的数字
+                pattern3 = r'(\d+)\.html?$'
+
+                for pattern in [pattern1, pattern2, pattern3]:
+                    cell_match = re.search(pattern, filename)
+                    if cell_match:
+                        cell = cell_match.group(1)
+                        break
+
+            # 额外的Cell编号提取逻辑（如果上述方法都没找到）
+            if cell == "UNKNOWN_CELL" and soup is not None:
+                # 尝试从表格或其他结构中提取Cell编号
+                try:
+                    # 查找包含"Cell"的表格单元格
+                    cell_tds = soup.find_all('td', string=re.compile(r'(?i)cell'))
+                    for td in cell_tds:
+                        parent_text = td.parent.get_text()
+                        cell_match = re.search(rf'\b({cell_regex})\b', parent_text)
+                        if cell_match:
+                            cell = cell_match.group(1)
+                            break
+                except:
+                    pass
 
             return sn, cell
 
@@ -1147,7 +1214,10 @@ class ParallelHTMLReportProcessor:
         """从文件名提取SN和cell编号"""
         sn_regex = r'F[A-Z0-9]{15}(?:[A-Z0-9]{5,})?'
         sn_match = re.search(rf'\b({sn_regex})\b', filename, flags=re.I)
-        cell_match = re.search(r'-(\d+)\.html$', filename)
+        # 匹配模式：破折号后、下划线或点号前的数字
+        pattern = r'-(\d+)[._]'
+        # cell_match = re.search(r'-(\d+)\.html$', filename)
+        cell_match = re.search(pattern, filename)
 
         sn = sn_match.group(1) if sn_match else "UNKNOWN_SN"
         cell = cell_match.group(1) if cell_match else "UNKNOWN_CELL"
@@ -1572,11 +1642,15 @@ if __name__ == "__main__":
     # 关键：防止打包环境下的重复执行
     mp.freeze_support()
 
-    # 额外保护：确保只在主进程中执行
-    if mp.current_process().name == 'MainProcess':
-        main()
-        input(f"输入任意结束程序......")
-    else:
-        # 子进程不需要执行任何交互代码
-        pass
+    try:
+        # 额外保护：确保只在主进程中执行
+        if mp.current_process().name == 'MainProcess':
+            main()
+            input(f"输入任意结束程序......")
+        else:
+            # 子进程不需要执行任何交互代码
+            pass
+
+    except KeyboardInterrupt:
+        print(f"用户中断程序")
 
diff --git a/htmlProcess/htmlReportProcess_picHtml/htmlReportProcess_picHtml_2kV1.py b/htmlProcess/htmlReportProcess_picHtml/htmlReportProcess_picHtml_2kV1.py
index 2b4529a..bd36440 100644
--- a/htmlProcess/htmlReportProcess_picHtml/htmlReportProcess_picHtml_2kV1.py
+++ b/htmlProcess/htmlReportProcess_picHtml/htmlReportProcess_picHtml_2kV1.py
@@ -977,7 +977,7 @@ class MultiFileTestReportScatterPlotter:
                 print(f"警告：发现 {failed_conversions} 个 'Cell' 值无法转换为数字，这些条目将保留为字符串或根据业务逻辑处理。")
                 # 业务决策：对于无法转换的，可以保留原字符串，或使用一个默认值
 		        # 例如，将无法转换的条目其数值编号设为-1或一个特定的标识值
-		        # test_data.loc[test_data['Cell编号_数值'].isna(), 'Cell编号_数值'] = -1
+                test_data.loc[test_data['Cell编号_数值'].isna(), 'Cell编号_数值'] = -1
 
             # 此时，您可以根据需求选择使用 'Cell编号'（字符串）或 'Cell编号_数值'（数字）进行后续分组和可视化
             # 对于绘图着色和排序，使用 'Cell编号_数值' 列
diff --git a/tempReportProcess/tempReportProcess_V1.py b/tempReportProcess/tempReportProcess_V1.py
index dde4e13..de972bb 100644
--- a/tempReportProcess/tempReportProcess_V1.py
+++ b/tempReportProcess/tempReportProcess_V1.py
@@ -27,10 +27,10 @@ class TemperatureDataAnalyzer:
             # 常见中文字体候选（跨平台）
             candidates = [
                 "Microsoft YaHei", "Microsoft YaHei UI",  # Windows
-                "SimHei", "SimSun",                      # Windows（黑体/宋体）
-                "PingFang SC", "Heiti SC",               # macOS
+                "SimHei", "SimSun",  # Windows（黑体/宋体）
+                "PingFang SC", "Heiti SC",  # macOS
                 "Noto Sans CJK SC", "Source Han Sans SC", "WenQuanYi Micro Hei",  # Linux
-                "Arial Unicode MS"                       # 覆盖广的 Unicode 字体
+                "Arial Unicode MS"  # 覆盖广的 Unicode 字体
             ]
             available = {f.name for f in font_manager.fontManager.ttflist}
             for name in candidates:
@@ -60,7 +60,7 @@ class TemperatureDataAnalyzer:
         return True
 
     def load_and_process_data(self):
-        """加载和处理数据"""
+        """加载和处理数据，并保存带时间戳的新文件"""
         try:
             # 读取CSV文件，无表头
             self.data = pd.read_csv(self.file_path, header=None)
@@ -71,6 +71,19 @@ class TemperatureDataAnalyzer:
             # 转换时间戳格式（文本例如：10/29/2025 2:20:41 PM）
             self.data['datetime'] = pd.to_datetime(self.data['timestamp'], format='%m/%d/%Y %I:%M:%S %p')
 
+            # 将转换后的datetime对象存储到D列（原数据只有3列，所以新增第4列）
+            self.data['converted_timestamp'] = self.data['datetime']
+
+
+            # 新增第5列：存储转换后的时间戳（ UTC的时间戳，精确到ms）
+            # self.data['utc_timestamp_ms'] = (self.data['datetime'].astype('int64') // 10**6)
+            self.data['utc_timestamp_ms'] = (self.data['datetime'].astype('int64') // 10**9)
+
+
+
+            # 保存带时间戳的新CSV文件
+            self._save_csv_with_timestamp()
+
             # 提取处理后的数据
             self.timestamps = self.data['datetime']
             self.temperatures = self.data['temperature']
@@ -83,6 +96,28 @@ class TemperatureDataAnalyzer:
             print(f"数据处理错误: {e}")
             return False
 
+    def _save_csv_with_timestamp(self):
+        """保存带时间戳的新CSV文件"""
+        try:
+            # 生成新文件名（原文件名+时间戳）
+            base_filename = os.path.splitext(os.path.basename(self.file_path))[0]
+            timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
+            output_filename = f"{base_filename}_with_timestamp_{timestamp_str}.csv"
+            output_dir = os.path.dirname(self.file_path)
+            output_path = os.path.join(output_dir, output_filename)
+
+            # # 选择需要保存的列：原始三列 + 转换后的时间戳列
+            # columns_to_save = ['timestamp', 'temperature', 'status', 'converted_timestamp']
+            # 选择需要保存的列：原始三列 + 转换后的时间戳列 + 格式化时间戳列
+            columns_to_save = ['timestamp', 'temperature', 'status', 'converted_timestamp', 'utc_timestamp_ms']
+            
+            self.data[columns_to_save].to_csv(output_path, index=False, header=False)
+
+            print(f"已保存带时间戳的新CSV文件: {output_path}")
+
+        except Exception as e:
+            print(f"保存带时间戳CSV文件时出错: {e}")
+
     def create_scatter_plots(self):
         """创建散点图"""
         fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))