更新 html的解析中 对于 cell 的解析,excel的处理中对于cell 的解析。以及温度处理中 对于 数据的处理增加 时间戳的换算。
This commit is contained in:
@@ -50,6 +50,17 @@ class LogManager:
|
||||
|
||||
def log_statistics(self, statistics_data):
|
||||
"""记录统计信息到日志文件"""
|
||||
|
||||
def mixed_sort_key(item):
|
||||
"""处理混合类型(数字字符串和普通字符串)的排序键"""
|
||||
try:
|
||||
if isinstance(item, str) and item.isdigit():
|
||||
return (0, int(item))
|
||||
else:
|
||||
return (1, str(item))
|
||||
except (ValueError, TypeError):
|
||||
return (2, str(item))
|
||||
|
||||
with open(self.log_file, 'a', encoding='utf-8') as f:
|
||||
f.write("\n" + "=" * 80 + "\n")
|
||||
f.write("处理统计汇总\n")
|
||||
@@ -73,7 +84,9 @@ class LogManager:
|
||||
f.write(f"{"="*30}\n")
|
||||
|
||||
# 按Cell编号排序
|
||||
sorted_cells = sorted(cell_stats.keys(), key=lambda x: int(x) if x.isdigit() else x)
|
||||
# sorted_cells = sorted(cell_stats.keys(), key=lambda x: int(x) if x.isdigit() else x)
|
||||
# 使用修复后的排序方法
|
||||
sorted_cells = sorted(cell_stats.keys(), key=mixed_sort_key)
|
||||
|
||||
for cell in sorted_cells:
|
||||
stats = cell_stats[cell]
|
||||
@@ -92,7 +105,7 @@ class LogManager:
|
||||
sorted_fail_items = sorted(fail_details.items(), key=lambda x: x[1], reverse=True)[:10]
|
||||
for test_name, count in sorted_fail_items:
|
||||
fail_items.append(f"{test_name}({count}次)")
|
||||
if len (sorted_fail_items)>10:
|
||||
if len (fail_details.items())>10:
|
||||
lastitems = sorted(fail_details.items(), key=lambda x: x[1], reverse=True)[10:]
|
||||
messageappend =''
|
||||
for test_name, count in lastitems:
|
||||
@@ -123,7 +136,10 @@ class LogManager:
|
||||
fail_details = stats.get('fail_details', {})
|
||||
|
||||
# Cell列表(逗号分隔)
|
||||
cell_list = ','.join(sorted(cells, key=lambda x: int(x) if x.isdigit() else x))
|
||||
# cell_list = ','.join(sorted(cells, key=lambda x: int(x) if x.isdigit() else x))
|
||||
# 使用修复后的排序方法
|
||||
cell_list = ','.join(sorted(cells, key=mixed_sort_key))
|
||||
|
||||
|
||||
# 失败项详情
|
||||
fail_items = []
|
||||
@@ -292,31 +308,82 @@ class HTMLFileProcessor:
|
||||
|
||||
@staticmethod
|
||||
def _extract_sn_and_cell(soup, filename):
|
||||
"""提取SN号和cell编号"""
|
||||
"""提取SN号和cell编号 - 支持多种来源"""
|
||||
try:
|
||||
sn_regex = r'F[A-Z0-9]{15}(?:[A-Z0-9]{5,})?'
|
||||
cell_regex = r'\b\d+\b' # 匹配纯数字的cell编号
|
||||
|
||||
# 提取SN
|
||||
# 初始化默认值
|
||||
sn = "UNKNOWN_SN"
|
||||
cell = "UNKNOWN_CELL"
|
||||
|
||||
# 优先从HTML内容中提取
|
||||
if soup is not None:
|
||||
html_text = soup.get_text(" ", strip=True)
|
||||
|
||||
# 方法1: 从Serial Number标签提取SN
|
||||
sn_tag = soup.find('h3', string=re.compile(r'Serial Number:', re.I))
|
||||
if sn_tag:
|
||||
content_match = re.search(rf'\b({sn_regex})\b', sn_tag.get_text(), flags=re.I)
|
||||
if content_match:
|
||||
sn = content_match.group(1)
|
||||
else:
|
||||
html_text = soup.get_text(" ", strip=True)
|
||||
|
||||
# 方法2: 从Test Cell标签提取Cell编号
|
||||
cell_tag = soup.find('h3', string=re.compile(r'Test Cell:', re.I))
|
||||
if cell_tag:
|
||||
cell_content = cell_tag.get_text()
|
||||
cell_match = re.search(rf'\b({cell_regex})\b', cell_content)
|
||||
if cell_match:
|
||||
cell = cell_match.group(1)
|
||||
|
||||
# 方法3: 从整个HTML文本中搜索SN(如果前面没找到)
|
||||
if sn == "UNKNOWN_SN":
|
||||
content_match = re.search(rf'\b({sn_regex})\b', html_text, flags=re.I)
|
||||
if content_match:
|
||||
sn = content_match.group(1)
|
||||
else:
|
||||
|
||||
# 方法4: 从整个HTML文本中搜索Cell编号(如果前面没找到)
|
||||
if cell == "UNKNOWN_CELL":
|
||||
# 搜索包含"cell"或"CELL"的文本段,然后提取数字
|
||||
cell_sections = re.findall(r'(?i)(?:cell|test cell)[^\d]*(\d+)', html_text)
|
||||
if cell_sections:
|
||||
cell = cell_sections[0] # 取第一个匹配的cell编号
|
||||
|
||||
# 如果HTML中没找到,从文件名中提取
|
||||
if sn == "UNKNOWN_SN":
|
||||
content_match = re.search(rf'\b({sn_regex})\b', filename, flags=re.I)
|
||||
if content_match:
|
||||
sn = content_match.group(1)
|
||||
|
||||
# 提取cell编号(从文件名末尾的"数字")
|
||||
cell_match = re.search(r'-(\d+)\.html$', filename)
|
||||
cell = cell_match.group(1) if cell_match else "UNKNOWN_CELL"
|
||||
if cell == "UNKNOWN_CELL":
|
||||
# 从文件名中提取cell编号(多种模式)
|
||||
# 模式1: 破折号后、下划线或点号前的数字
|
||||
pattern1 = r'-(\d+)[._]'
|
||||
# 模式2: 包含"cell"或"CELL"后跟数字
|
||||
pattern2 = r'(?i)(?:cell|CELL)[^\d]*(\d+)'
|
||||
# 模式3: 文件名末尾的数字
|
||||
pattern3 = r'(\d+)\.html?$'
|
||||
|
||||
for pattern in [pattern1, pattern2, pattern3]:
|
||||
cell_match = re.search(pattern, filename)
|
||||
if cell_match:
|
||||
cell = cell_match.group(1)
|
||||
break
|
||||
|
||||
# 额外的Cell编号提取逻辑(如果上述方法都没找到)
|
||||
if cell == "UNKNOWN_CELL" and soup is not None:
|
||||
# 尝试从表格或其他结构中提取Cell编号
|
||||
try:
|
||||
# 查找包含"Cell"的表格单元格
|
||||
cell_tds = soup.find_all('td', string=re.compile(r'(?i)cell'))
|
||||
for td in cell_tds:
|
||||
parent_text = td.parent.get_text()
|
||||
cell_match = re.search(rf'\b({cell_regex})\b', parent_text)
|
||||
if cell_match:
|
||||
cell = cell_match.group(1)
|
||||
break
|
||||
except:
|
||||
pass
|
||||
|
||||
return sn, cell
|
||||
|
||||
@@ -1147,7 +1214,10 @@ class ParallelHTMLReportProcessor:
|
||||
"""从文件名提取SN和cell编号"""
|
||||
sn_regex = r'F[A-Z0-9]{15}(?:[A-Z0-9]{5,})?'
|
||||
sn_match = re.search(rf'\b({sn_regex})\b', filename, flags=re.I)
|
||||
cell_match = re.search(r'-(\d+)\.html$', filename)
|
||||
# 匹配模式:破折号后、下划线或点号前的数字
|
||||
pattern = r'-(\d+)[._]'
|
||||
# cell_match = re.search(r'-(\d+)\.html$', filename)
|
||||
cell_match = re.search(pattern, filename)
|
||||
|
||||
sn = sn_match.group(1) if sn_match else "UNKNOWN_SN"
|
||||
cell = cell_match.group(1) if cell_match else "UNKNOWN_CELL"
|
||||
@@ -1572,11 +1642,15 @@ if __name__ == "__main__":
|
||||
# 关键:防止打包环境下的重复执行
|
||||
mp.freeze_support()
|
||||
|
||||
# 额外保护:确保只在主进程中执行
|
||||
if mp.current_process().name == 'MainProcess':
|
||||
main()
|
||||
input(f"输入任意结束程序......")
|
||||
else:
|
||||
# 子进程不需要执行任何交互代码
|
||||
pass
|
||||
try:
|
||||
# 额外保护:确保只在主进程中执行
|
||||
if mp.current_process().name == 'MainProcess':
|
||||
main()
|
||||
input(f"输入任意结束程序......")
|
||||
else:
|
||||
# 子进程不需要执行任何交互代码
|
||||
pass
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print(f"用户中断程序")
|
||||
|
||||
|
||||
@@ -977,7 +977,7 @@ class MultiFileTestReportScatterPlotter:
|
||||
print(f"警告:发现 {failed_conversions} 个 'Cell' 值无法转换为数字,这些条目将保留为字符串或根据业务逻辑处理。")
|
||||
# 业务决策:对于无法转换的,可以保留原字符串,或使用一个默认值
|
||||
# 例如,将无法转换的条目其数值编号设为-1或一个特定的标识值
|
||||
# test_data.loc[test_data['Cell编号_数值'].isna(), 'Cell编号_数值'] = -1
|
||||
test_data.loc[test_data['Cell编号_数值'].isna(), 'Cell编号_数值'] = -1
|
||||
|
||||
# 此时,您可以根据需求选择使用 'Cell编号'(字符串)或 'Cell编号_数值'(数字)进行后续分组和可视化
|
||||
# 对于绘图着色和排序,使用 'Cell编号_数值' 列
|
||||
|
||||
@@ -27,10 +27,10 @@ class TemperatureDataAnalyzer:
|
||||
# 常见中文字体候选(跨平台)
|
||||
candidates = [
|
||||
"Microsoft YaHei", "Microsoft YaHei UI", # Windows
|
||||
"SimHei", "SimSun", # Windows(黑体/宋体)
|
||||
"PingFang SC", "Heiti SC", # macOS
|
||||
"SimHei", "SimSun", # Windows(黑体/宋体)
|
||||
"PingFang SC", "Heiti SC", # macOS
|
||||
"Noto Sans CJK SC", "Source Han Sans SC", "WenQuanYi Micro Hei", # Linux
|
||||
"Arial Unicode MS" # 覆盖广的 Unicode 字体
|
||||
"Arial Unicode MS" # 覆盖广的 Unicode 字体
|
||||
]
|
||||
available = {f.name for f in font_manager.fontManager.ttflist}
|
||||
for name in candidates:
|
||||
@@ -60,7 +60,7 @@ class TemperatureDataAnalyzer:
|
||||
return True
|
||||
|
||||
def load_and_process_data(self):
|
||||
"""加载和处理数据"""
|
||||
"""加载和处理数据,并保存带时间戳的新文件"""
|
||||
try:
|
||||
# 读取CSV文件,无表头
|
||||
self.data = pd.read_csv(self.file_path, header=None)
|
||||
@@ -71,6 +71,19 @@ class TemperatureDataAnalyzer:
|
||||
# 转换时间戳格式(文本例如:10/29/2025 2:20:41 PM)
|
||||
self.data['datetime'] = pd.to_datetime(self.data['timestamp'], format='%m/%d/%Y %I:%M:%S %p')
|
||||
|
||||
# 将转换后的datetime对象存储到D列(原数据只有3列,所以新增第4列)
|
||||
self.data['converted_timestamp'] = self.data['datetime']
|
||||
|
||||
|
||||
# 新增第5列:存储转换后的时间戳( UTC的时间戳,精确到ms)
|
||||
# self.data['utc_timestamp_ms'] = (self.data['datetime'].astype('int64') // 10**6)
|
||||
self.data['utc_timestamp_ms'] = (self.data['datetime'].astype('int64') // 10**9)
|
||||
|
||||
|
||||
|
||||
# 保存带时间戳的新CSV文件
|
||||
self._save_csv_with_timestamp()
|
||||
|
||||
# 提取处理后的数据
|
||||
self.timestamps = self.data['datetime']
|
||||
self.temperatures = self.data['temperature']
|
||||
@@ -83,6 +96,28 @@ class TemperatureDataAnalyzer:
|
||||
print(f"数据处理错误: {e}")
|
||||
return False
|
||||
|
||||
def _save_csv_with_timestamp(self):
|
||||
"""保存带时间戳的新CSV文件"""
|
||||
try:
|
||||
# 生成新文件名(原文件名+时间戳)
|
||||
base_filename = os.path.splitext(os.path.basename(self.file_path))[0]
|
||||
timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
output_filename = f"{base_filename}_with_timestamp_{timestamp_str}.csv"
|
||||
output_dir = os.path.dirname(self.file_path)
|
||||
output_path = os.path.join(output_dir, output_filename)
|
||||
|
||||
# # 选择需要保存的列:原始三列 + 转换后的时间戳列
|
||||
# columns_to_save = ['timestamp', 'temperature', 'status', 'converted_timestamp']
|
||||
# 选择需要保存的列:原始三列 + 转换后的时间戳列 + 格式化时间戳列
|
||||
columns_to_save = ['timestamp', 'temperature', 'status', 'converted_timestamp', 'utc_timestamp_ms']
|
||||
|
||||
self.data[columns_to_save].to_csv(output_path, index=False, header=False)
|
||||
|
||||
print(f"已保存带时间戳的新CSV文件: {output_path}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"保存带时间戳CSV文件时出错: {e}")
|
||||
|
||||
def create_scatter_plots(self):
|
||||
"""创建散点图"""
|
||||
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
|
||||
|
||||
Reference in New Issue
Block a user