Python脚本开发文件初始化
This commit is contained in:
7
.gitignore
vendored
Normal file
7
.gitignore
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
/.idea/*
|
||||
.idea/*
|
||||
/.idea
|
||||
.idea
|
||||
/.venv
|
||||
/.venv/*
|
||||
|
||||
16
BOMCompare/.gitignore
vendored
Normal file
16
BOMCompare/.gitignore
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
/build/*
|
||||
/build
|
||||
/dist/*
|
||||
/dist
|
||||
/source/*
|
||||
/source
|
||||
|
||||
BOMCompare for Merge V2.py
|
||||
|
||||
BOMCompareForJP2.py
|
||||
|
||||
|
||||
BOMConsolidator.py
|
||||
|
||||
BOMConsolidatorV2.py
|
||||
# BOMConsolidator.py
|
||||
655
BOMCompare/BOMCompare for Merge V1.py
Normal file
655
BOMCompare/BOMCompare for Merge V1.py
Normal file
@@ -0,0 +1,655 @@
|
||||
import pandas as pd
|
||||
import tkinter as tk
|
||||
from tkinter import filedialog
|
||||
from datetime import datetime
|
||||
import os
|
||||
from typing import Dict, List, Tuple, Optional
|
||||
|
||||
|
||||
class BOMComparator:
|
||||
"""BOM文件差异对比器"""
|
||||
|
||||
def __init__(self):
|
||||
self.file1_path = ""
|
||||
self.file2_path = ""
|
||||
self.file1_sheets = []
|
||||
self.file2_sheets = []
|
||||
self.common_sheets = []
|
||||
self.differences = {}
|
||||
self.file1_name = ""
|
||||
self.file2_name = ""
|
||||
self.columns_to_exclude = ['检查信息', '检查状态', '校验信息'] # 要排除的列名
|
||||
|
||||
def select_file(self, title: str) -> str:
|
||||
"""手动选择文件"""
|
||||
root = tk.Tk()
|
||||
root.withdraw()
|
||||
file_path = filedialog.askopenfilename(
|
||||
title=title,
|
||||
filetypes=[("Excel files", "*.xlsx"), ("All files", "*.*")]
|
||||
)
|
||||
root.destroy()
|
||||
return file_path
|
||||
|
||||
def find_valid_sheets(self, file_path: str) -> List[str]:
|
||||
"""参考附件3的方式查找有效的sheet"""
|
||||
valid_sheets = []
|
||||
|
||||
try:
|
||||
xl_file = pd.ExcelFile(file_path)
|
||||
|
||||
for sheet_name in xl_file.sheet_names:
|
||||
try:
|
||||
# 尝试读取sheet,检查是否包含BOM数据
|
||||
df = pd.read_excel(file_path, sheet_name=sheet_name, nrows=10)
|
||||
|
||||
# 检查是否包含BOM相关列(参考附件结构)
|
||||
required_columns = ['Partnumber', 'Purchase_Code', 'MF_PN', 'Description']
|
||||
found_columns = [col for col in df.columns if col in required_columns]
|
||||
|
||||
if len(found_columns) >= 2: # 至少找到2个关键列
|
||||
# 检查是否有实际数据(不只是表头)
|
||||
if len(df) > 1:
|
||||
valid_sheets.append(sheet_name)
|
||||
|
||||
except Exception as e:
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
print(f"读取文件 {file_path} 时出错: {e}")
|
||||
|
||||
return valid_sheets
|
||||
|
||||
def get_common_sheets(self) -> List[str]:
|
||||
"""获取两个文件的共同工作表"""
|
||||
if not self.file1_sheets or not self.file2_sheets:
|
||||
return []
|
||||
|
||||
# 标准化工作表名称(去除空格和特殊字符)
|
||||
file1_clean = [self.standardize_sheet_name(sheet) for sheet in self.file1_sheets]
|
||||
file2_clean = [self.standardize_sheet_name(sheet) for sheet in self.file2_sheets]
|
||||
|
||||
# 找出共同的工作表
|
||||
common_sheets = []
|
||||
for sheet1 in self.file1_sheets:
|
||||
clean_sheet1 = self.standardize_sheet_name(sheet1)
|
||||
for sheet2 in self.file2_sheets:
|
||||
clean_sheet2 = self.standardize_sheet_name(sheet2)
|
||||
if clean_sheet1 == clean_sheet2:
|
||||
common_sheets.append(sheet1)
|
||||
break
|
||||
|
||||
return common_sheets
|
||||
|
||||
def standardize_sheet_name(self, sheet_name: str) -> str:
|
||||
"""标准化工作表名称,便于比较"""
|
||||
return str(sheet_name).strip().lower().replace(' ', '_').replace('-', '_')
|
||||
|
||||
def load_bom_data(self, file_path: str, sheet_name: str) -> pd.DataFrame:
|
||||
"""加载BOM数据"""
|
||||
try:
|
||||
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
||||
# 清理数据:去除空行和空列
|
||||
df = df.dropna(how='all').dropna(axis=1, how='all')
|
||||
|
||||
# 清理列名
|
||||
df.columns = df.columns.str.strip()
|
||||
|
||||
return df
|
||||
except Exception as e:
|
||||
print(f"加载sheet {sheet_name} 时出错: {e}")
|
||||
return pd.DataFrame()
|
||||
|
||||
def should_compare_column(self, column_name: str) -> bool:
|
||||
"""判断是否应该对比该列(排除检查信息类列)"""
|
||||
exclude_keywords = ['检查', '校验', '状态', '备注', 'comment', 'check']
|
||||
column_lower = str(column_name).lower()
|
||||
|
||||
# 检查是否在排除列表中
|
||||
if column_name in self.columns_to_exclude:
|
||||
return False
|
||||
|
||||
# 检查是否包含排除关键词
|
||||
for keyword in exclude_keywords:
|
||||
if keyword in column_lower:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def get_columns_to_compare(self, df1: pd.DataFrame, df2: pd.DataFrame) -> List[str]:
|
||||
"""获取需要对比的列名(排除检查信息类列)"""
|
||||
common_columns = list(set(df1.columns).intersection(set(df2.columns)))
|
||||
|
||||
# 过滤掉不需要对比的列
|
||||
columns_to_compare = [col for col in common_columns if self.should_compare_column(col)]
|
||||
|
||||
return columns_to_compare
|
||||
|
||||
def compare_dataframes(self, df1: pd.DataFrame, df2: pd.DataFrame, sheet_name1: str, sheet_name2: str) -> Dict:
|
||||
"""对比两个DataFrame的差异(排除检查信息类列)"""
|
||||
differences = {
|
||||
'sheet_names': f"{sheet_name1} vs {sheet_name2}",
|
||||
'added_rows': [],
|
||||
'removed_rows': [],
|
||||
'modified_rows': [],
|
||||
'columns_comparison': {},
|
||||
'summary': {
|
||||
'total_rows_df1': len(df1),
|
||||
'total_rows_df2': len(df2),
|
||||
'added_count': 0,
|
||||
'removed_count': 0,
|
||||
'modified_count': 0
|
||||
},
|
||||
'original_dfs': {
|
||||
'df1': df1.copy(),
|
||||
'df2': df2.copy()
|
||||
}
|
||||
}
|
||||
|
||||
# 确定关键列用于行匹配
|
||||
key_columns = self.identify_key_columns(df1, df2)
|
||||
|
||||
if not key_columns:
|
||||
differences['error'] = "无法确定用于对比的关键列"
|
||||
return differences
|
||||
|
||||
try:
|
||||
# 设置索引
|
||||
df1_indexed = df1.set_index(key_columns)
|
||||
df2_indexed = df2.set_index(key_columns)
|
||||
|
||||
# 获取需要对比的列(排除检查信息类列)
|
||||
columns_to_compare = self.get_columns_to_compare(df1, df2)
|
||||
|
||||
# 找出新增的行
|
||||
new_indexes = df2_indexed.index.difference(df1_indexed.index)
|
||||
if len(new_indexes) > 0:
|
||||
differences['added_rows'] = df2_indexed.loc[new_indexes].reset_index().to_dict('records')
|
||||
differences['summary']['added_count'] = len(new_indexes)
|
||||
|
||||
# 找出删除的行
|
||||
removed_indexes = df1_indexed.index.difference(df2_indexed.index)
|
||||
if len(removed_indexes) > 0:
|
||||
differences['removed_rows'] = df1_indexed.loc[removed_indexes].reset_index().to_dict('records')
|
||||
differences['summary']['removed_count'] = len(removed_indexes)
|
||||
|
||||
# 找出共同的行并进行详细对比(排除检查信息类列)
|
||||
common_indexes = df1_indexed.index.intersection(df2_indexed.index)
|
||||
|
||||
for idx in common_indexes:
|
||||
row1 = df1_indexed.loc[idx]
|
||||
row2 = df2_indexed.loc[idx]
|
||||
|
||||
# 检查每列的值是否相同(只对比需要比较的列)
|
||||
modified_cols = {}
|
||||
for col in columns_to_compare:
|
||||
if col in df1_indexed.columns and col in df2_indexed.columns:
|
||||
val1 = row1[col]
|
||||
val2 = row2[col]
|
||||
|
||||
# 处理NaN值的比较
|
||||
if pd.isna(val1) and pd.isna(val2):
|
||||
continue
|
||||
elif pd.isna(val1) or pd.isna(val2) or str(val1) != str(val2):
|
||||
modified_cols[col] = {
|
||||
'old_value': val1,
|
||||
'new_value': val2
|
||||
}
|
||||
|
||||
if modified_cols:
|
||||
# 获取完整的行数据以显示所有需要的列
|
||||
full_row_data = self.get_full_row_data_for_display(df1, df2, idx, key_columns)
|
||||
|
||||
differences['modified_rows'].append({
|
||||
'key_values': dict(zip(key_columns, idx)) if isinstance(idx, tuple) else {key_columns[0]: idx},
|
||||
'modified_columns': modified_cols,
|
||||
'full_row_data': full_row_data
|
||||
})
|
||||
differences['summary']['modified_count'] += 1
|
||||
|
||||
# 列级对比(包含所有列,用于统计)
|
||||
common_columns = set(df1.columns).intersection(set(df2.columns))
|
||||
df1_only_columns = set(df1.columns).difference(set(df2.columns))
|
||||
df2_only_columns = set(df2.columns).difference(set(df1.columns))
|
||||
|
||||
# 计算实际参与对比的列
|
||||
compared_columns = set(columns_to_compare)
|
||||
excluded_columns = common_columns - compared_columns
|
||||
|
||||
differences['columns_comparison'] = {
|
||||
'common_columns': list(common_columns),
|
||||
'compared_columns': list(compared_columns),
|
||||
'excluded_columns': list(excluded_columns),
|
||||
'file1_only_columns': list(df1_only_columns),
|
||||
'file2_only_columns': list(df2_only_columns)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
differences['error'] = f"对比过程中出错: {str(e)}"
|
||||
|
||||
return differences
|
||||
|
||||
def get_full_row_data_for_display(self, df1: pd.DataFrame, df2: pd.DataFrame, idx, key_columns: List[str]) -> Dict:
|
||||
"""获取完整的行数据用于显示"""
|
||||
display_data = {}
|
||||
|
||||
# 获取两个文件中的对应行数据
|
||||
row1_data = self.extract_row_data(df1, idx, key_columns)
|
||||
row2_data = self.extract_row_data(df2, idx, key_columns)
|
||||
|
||||
# 定义需要显示的列(排除检查信息类列)
|
||||
display_columns = ['Purchase_Code', 'MF_PN', 'Description', 'Part Type', 'MF_NAME', 'PCB_Footprint', '合计']
|
||||
|
||||
# 过滤掉检查信息类列
|
||||
display_columns = [col for col in display_columns if self.should_compare_column(col)]
|
||||
|
||||
for col in display_columns:
|
||||
val1 = row1_data.get(col, '')
|
||||
val2 = row2_data.get(col, '')
|
||||
|
||||
# 格式化显示:有差异显示原值->新值,无差异显示原值
|
||||
if pd.isna(val1) or val1 == '':
|
||||
display_value = val2
|
||||
elif pd.isna(val2) or val2 == '':
|
||||
display_value = val1
|
||||
elif str(val1) != str(val2):
|
||||
display_value = f"{val1} -> {val2}"
|
||||
else:
|
||||
display_value = val1
|
||||
|
||||
display_data[col] = display_value
|
||||
|
||||
# 添加文件来源信息
|
||||
display_data['_from_file1'] = row1_data
|
||||
display_data['_from_file2'] = row2_data
|
||||
|
||||
return display_data
|
||||
|
||||
def extract_row_data(self, df: pd.DataFrame, idx, key_columns: List[str]) -> Dict:
|
||||
"""从DataFrame中提取指定行的数据"""
|
||||
row_data = {}
|
||||
|
||||
try:
|
||||
if isinstance(idx, tuple):
|
||||
# 多列索引的情况
|
||||
mask = pd.Series(True, index=df.index)
|
||||
for i, key in enumerate(key_columns):
|
||||
mask = mask & (df[key] == idx[i])
|
||||
if mask.any():
|
||||
original_row = df[mask].iloc[0]
|
||||
for col in df.columns:
|
||||
row_data[col] = original_row[col]
|
||||
else:
|
||||
# 单列索引的情况
|
||||
matching_rows = df[df[key_columns[0]] == idx]
|
||||
if len(matching_rows) > 0:
|
||||
original_row = matching_rows.iloc[0]
|
||||
for col in df.columns:
|
||||
row_data[col] = original_row[col]
|
||||
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
return row_data
|
||||
|
||||
def format_value_display(self, value1, value2):
|
||||
"""格式化值的显示:有差异显示原值->新值,无差异显示原值"""
|
||||
if pd.isna(value1) or value1 == '':
|
||||
return value2
|
||||
elif pd.isna(value2) or value2 == '':
|
||||
return value1
|
||||
elif str(value1) != str(value2):
|
||||
return f"{value1} -> {value2}"
|
||||
else:
|
||||
return value1
|
||||
|
||||
def get_modified_columns_summary(self, modified_columns: Dict) -> str:
|
||||
"""获取修改列的概要汇总"""
|
||||
if not modified_columns:
|
||||
return "无修改"
|
||||
|
||||
modified_list = list(modified_columns.keys())
|
||||
|
||||
# 如果修改列数量较少,直接显示
|
||||
if len(modified_list) <= 3:
|
||||
return ", ".join(modified_list)
|
||||
else:
|
||||
# 数量较多时显示前3个加省略号
|
||||
return ", ".join(modified_list[:3]) + f"...等{len(modified_list)}列"
|
||||
|
||||
def identify_key_columns(self, df1: pd.DataFrame, df2: pd.DataFrame) -> List[str]:
|
||||
"""识别用于行匹配的关键列"""
|
||||
# 优先使用Partnumber作为关键列
|
||||
potential_keys = ['Partnumber', 'Purchase_Code', 'MF_PN']
|
||||
|
||||
for key in potential_keys:
|
||||
if key in df1.columns and key in df2.columns:
|
||||
# 检查该列是否适合作为关键列(不应有过多重复值)
|
||||
df1_dup_rate = df1[key].duplicated().sum() / len(df1)
|
||||
df2_dup_rate = df2[key].duplicated().sum() / len(df2)
|
||||
|
||||
if df1_dup_rate < 0.1 and df2_dup_rate < 0.1: # 允许少量重复
|
||||
return [key]
|
||||
|
||||
# 如果没有单一关键列,尝试组合
|
||||
for key_combo in [['Partnumber', 'MF_PN'], ['Purchase_Code', 'MF_PN']]:
|
||||
if all(col in df1.columns for col in key_combo) and all(col in df2.columns for col in key_combo):
|
||||
return key_combo
|
||||
|
||||
# 最后尝试使用所有找到的共同列
|
||||
common_cols = list(set(df1.columns).intersection(set(df2.columns)))
|
||||
if common_cols:
|
||||
return common_cols[:2] # 最多使用前两列
|
||||
|
||||
return []
|
||||
|
||||
def generate_output_filename(self) -> str:
|
||||
"""生成输出文件名,以两个文件的有效sheet名称开头"""
|
||||
if not self.file1_sheets or not self.file2_sheets:
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
return f"BOM差异报告_{timestamp}.xlsx"
|
||||
|
||||
# 使用第一个文件第一个sheet和第二个文件第一个sheet
|
||||
file1_sheet_name = str(self.file1_sheets[0]) if self.file1_sheets else "File1"
|
||||
file2_sheet_name = str(self.file2_sheets[0]) if self.file2_sheets else "File2"
|
||||
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
# 清理sheet名称中的特殊字符
|
||||
clean_sheet1 = self.clean_filename(file1_sheet_name)
|
||||
clean_sheet2 = self.clean_filename(file2_sheet_name)
|
||||
|
||||
filename = f"{clean_sheet1}_vs_{clean_sheet2}_差异报告_{timestamp}.xlsx"
|
||||
|
||||
return filename
|
||||
|
||||
def clean_filename(self, filename: str) -> str:
|
||||
"""清理文件名中的特殊字符"""
|
||||
filename = str(filename)
|
||||
|
||||
# 移除Windows文件名中不允许的字符
|
||||
invalid_chars = '<>:"/\\|?*'
|
||||
for char in invalid_chars:
|
||||
filename = filename.replace(char, '_')
|
||||
|
||||
# 移除多余的空格和特殊字符
|
||||
filename = filename.replace(' ', '_')
|
||||
filename = filename.replace('\t', '_')
|
||||
filename = filename.replace('\n', '_')
|
||||
|
||||
# 限制文件名长度
|
||||
if len(filename) > 50:
|
||||
filename = filename[:50]
|
||||
|
||||
return filename
|
||||
|
||||
def clean_sheet_name(self, sheet_name: str, max_length: int = 25) -> str:
|
||||
"""清理工作表名称,确保符合Excel工作表名称限制"""
|
||||
sheet_name = str(sheet_name)
|
||||
|
||||
# 移除Excel工作表名称中不允许的字符
|
||||
invalid_chars = '[]:*?/\\'
|
||||
for char in invalid_chars:
|
||||
sheet_name = sheet_name.replace(char, '_')
|
||||
|
||||
# 限制工作表名称长度(Excel限制为31个字符)
|
||||
if len(sheet_name) > max_length:
|
||||
sheet_name = sheet_name[:max_length]
|
||||
|
||||
return sheet_name
|
||||
|
||||
def get_output_directory(self) -> str:
|
||||
"""获取输出目录(第二个文件所在目录)"""
|
||||
return os.path.dirname(self.file2_path)
|
||||
|
||||
def generate_difference_report(self) -> str:
|
||||
"""生成差异报告Excel文件"""
|
||||
if not self.differences:
|
||||
return "没有发现差异"
|
||||
|
||||
# 生成输出文件名和路径
|
||||
output_filename = self.generate_output_filename()
|
||||
output_directory = self.get_output_directory()
|
||||
output_path = os.path.join(output_directory, output_filename)
|
||||
|
||||
try:
|
||||
with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
|
||||
|
||||
# 创建总摘要表
|
||||
summary_data = []
|
||||
for diff_key, differences in self.differences.items():
|
||||
if 'error' not in differences:
|
||||
columns_comparison = differences.get('columns_comparison', {})
|
||||
excluded_count = len(columns_comparison.get('excluded_columns', []))
|
||||
|
||||
summary_data.append([
|
||||
differences.get('sheet_names', diff_key),
|
||||
differences['summary']['total_rows_df1'],
|
||||
differences['summary']['total_rows_df2'],
|
||||
differences['summary']['added_count'],
|
||||
differences['summary']['removed_count'],
|
||||
differences['summary']['modified_count'],
|
||||
excluded_count
|
||||
])
|
||||
|
||||
if summary_data:
|
||||
summary_df = pd.DataFrame(summary_data, columns=[
|
||||
'工作表对比', '文件1行数', '文件2行数', '新增行数', '删除行数', '修改行数', '排除列数'
|
||||
])
|
||||
summary_df.to_excel(writer, sheet_name='对比摘要', index=False)
|
||||
|
||||
# 为每个对比创建详细报告
|
||||
for diff_key, differences in self.differences.items():
|
||||
sheet_key = self.clean_sheet_name(diff_key.replace('vs', '_vs_'))
|
||||
|
||||
if 'error' in differences:
|
||||
# 如果有错误,创建错误报告
|
||||
error_df = pd.DataFrame([['错误信息', differences['error']]])
|
||||
error_df.to_excel(writer, sheet_name=f"{sheet_key}_错误", index=False, header=False)
|
||||
continue
|
||||
|
||||
# 汇总表 - 包含列对比的详细信息
|
||||
summary_data = []
|
||||
summary_data.append(["对比项", "数量"])
|
||||
summary_data.append(["文件1总行数", differences['summary']['total_rows_df1']])
|
||||
summary_data.append(["文件2总行数", differences['summary']['total_rows_df2']])
|
||||
summary_data.append(["新增行数", differences['summary']['added_count']])
|
||||
summary_data.append(["删除行数", differences['summary']['removed_count']])
|
||||
summary_data.append(["修改行数", differences['summary']['modified_count']])
|
||||
summary_data.append(["共同列数", len(differences['columns_comparison']['common_columns'])])
|
||||
summary_data.append(["实际对比列数", len(differences['columns_comparison']['compared_columns'])])
|
||||
summary_data.append(["排除列数", len(differences['columns_comparison']['excluded_columns'])])
|
||||
summary_data.append(["文件1特有列", len(differences['columns_comparison']['file1_only_columns'])])
|
||||
summary_data.append(["文件2特有列", len(differences['columns_comparison']['file2_only_columns'])])
|
||||
|
||||
# 添加排除列详情
|
||||
excluded_cols = differences['columns_comparison'].get('excluded_columns', [])
|
||||
if excluded_cols:
|
||||
summary_data.append(["", ""])
|
||||
summary_data.append(["排除的列", "(检查信息类列不参与对比)"])
|
||||
for col in excluded_cols:
|
||||
summary_data.append(["", f"- {col}"])
|
||||
|
||||
pd.DataFrame(summary_data).to_excel(
|
||||
writer,
|
||||
sheet_name=f"{sheet_key}_汇总",
|
||||
index=False,
|
||||
header=False
|
||||
)
|
||||
|
||||
# 新增行详情
|
||||
if differences['added_rows']:
|
||||
pd.DataFrame(differences['added_rows']).to_excel(
|
||||
writer,
|
||||
sheet_name=f"{sheet_key}_新增行",
|
||||
index=False
|
||||
)
|
||||
|
||||
# 删除行详情
|
||||
if differences['removed_rows']:
|
||||
pd.DataFrame(differences['removed_rows']).to_excel(
|
||||
writer,
|
||||
sheet_name=f"{sheet_key}_删除行",
|
||||
index=False
|
||||
)
|
||||
|
||||
# 修改行详情 - 优化后的显示格式(排除检查信息列)
|
||||
if differences['modified_rows']:
|
||||
modified_data = []
|
||||
|
||||
for mod_row in differences['modified_rows']:
|
||||
# 创建基础记录
|
||||
record = {
|
||||
**mod_row['key_values'], # 关键列(如Partnumber)
|
||||
'修改列': self.get_modified_columns_summary(mod_row['modified_columns'])
|
||||
}
|
||||
|
||||
# 添加所有需要显示的列(排除检查信息类列)
|
||||
display_data = mod_row.get('full_row_data', {})
|
||||
|
||||
# 获取需要显示的列
|
||||
display_columns = list(display_data.keys())
|
||||
display_columns = [col for col in display_columns if
|
||||
not col.startswith('_') and self.should_compare_column(col)]
|
||||
|
||||
for col in display_columns:
|
||||
record[col] = display_data.get(col, '')
|
||||
|
||||
# 添加详细的修改信息(只包括参与对比的列)
|
||||
for col, values in mod_row['modified_columns'].items():
|
||||
if self.should_compare_column(col):
|
||||
record[f'详细_{col}'] = f"{values['old_value']} -> {values['new_value']}"
|
||||
|
||||
modified_data.append(record)
|
||||
|
||||
if modified_data:
|
||||
modified_df = pd.DataFrame(modified_data)
|
||||
|
||||
# 重新排列列的顺序,让重要信息在前
|
||||
column_order = list(mod_row['key_values'].keys()) + ['修改列']
|
||||
|
||||
# 添加其他显示列
|
||||
other_columns = [col for col in modified_df.columns
|
||||
if col not in column_order and not col.startswith('详细_')]
|
||||
column_order.extend(other_columns)
|
||||
|
||||
# 添加详细修改信息列
|
||||
detailed_cols = [col for col in modified_df.columns if col.startswith('详细_')]
|
||||
column_order.extend(detailed_cols)
|
||||
|
||||
# 确保所有列都存在
|
||||
existing_columns = [col for col in column_order if col in modified_df.columns]
|
||||
modified_df = modified_df[existing_columns]
|
||||
|
||||
modified_df.to_excel(
|
||||
writer,
|
||||
sheet_name=f"{sheet_key}_修改行",
|
||||
index=False
|
||||
)
|
||||
|
||||
return output_path
|
||||
|
||||
except Exception as e:
|
||||
print(f"生成报告时出错: {e}")
|
||||
return ""
|
||||
|
||||
def run_comparison(self):
|
||||
"""执行完整的BOM对比流程"""
|
||||
print("=== BOM文件差异对比工具 ===")
|
||||
print("注意:检查信息类列(如'检查信息')将不参与修改行对比")
|
||||
|
||||
# 1. 选择第一份文件
|
||||
print("\n步骤1: 选择第一份Excel文件")
|
||||
self.file1_path = self.select_file("选择第一份BOM Excel文件")
|
||||
if not self.file1_path:
|
||||
print("未选择文件,程序退出")
|
||||
return
|
||||
|
||||
self.file1_name = os.path.basename(self.file1_path)
|
||||
|
||||
# 2. 选择第二份文件
|
||||
print("\n步骤2: 选择第二份Excel文件")
|
||||
self.file2_path = self.select_file("选择第二份BOM Excel文件")
|
||||
if not self.file2_path:
|
||||
print("未选择文件,程序退出")
|
||||
return
|
||||
|
||||
self.file2_name = os.path.basename(self.file2_path)
|
||||
|
||||
print(f"\n文件1: {self.file1_name}")
|
||||
print(f"文件2: {self.file2_name}")
|
||||
|
||||
# 3. 查找有效sheet
|
||||
print("\n步骤3: 查找有效的工作表...")
|
||||
self.file1_sheets = self.find_valid_sheets(self.file1_path)
|
||||
self.file2_sheets = self.find_valid_sheets(self.file2_path)
|
||||
|
||||
print(f"文件1的有效工作表: {self.file1_sheets}")
|
||||
print(f"文件2的有效工作表: {self.file2_sheets}")
|
||||
|
||||
if not self.file1_sheets or not self.file2_sheets:
|
||||
print("至少有一个文件没有有效的工作表,无法进行对比")
|
||||
return
|
||||
|
||||
# 4. 进行差异对比
|
||||
print("\n步骤4: 进行差异对比...")
|
||||
self.differences = {}
|
||||
|
||||
# 使用第一个文件第一个sheet和第二个文件第一个sheet进行对比
|
||||
sheet1 = self.file1_sheets[0]
|
||||
sheet2 = self.file2_sheets[0]
|
||||
|
||||
print(f"正在对比: {sheet1} (文件1) vs {sheet2} (文件2)")
|
||||
|
||||
df1 = self.load_bom_data(self.file1_path, sheet1)
|
||||
df2 = self.load_bom_data(self.file2_path, sheet2)
|
||||
|
||||
if df1.empty:
|
||||
print(f" ⚠ 文件1的工作表 {sheet1} 数据加载失败")
|
||||
return
|
||||
|
||||
if df2.empty:
|
||||
print(f" ⚠ 文件2的工作表 {sheet2} 数据加载失败")
|
||||
return
|
||||
|
||||
differences = self.compare_dataframes(df1, df2, sheet1, sheet2)
|
||||
comparison_key = f"{sheet1}_vs_{sheet2}"
|
||||
self.differences[comparison_key] = differences
|
||||
|
||||
if 'error' in differences:
|
||||
print(f" ⚠ 对比过程中出错: {differences['error']}")
|
||||
else:
|
||||
columns_comparison = differences.get('columns_comparison', {})
|
||||
excluded_count = len(columns_comparison.get('excluded_columns', []))
|
||||
|
||||
print(f" √ 完成对比:")
|
||||
print(f" 文件1行数: {differences['summary']['total_rows_df1']}")
|
||||
print(f" 文件2行数: {differences['summary']['total_rows_df2']}")
|
||||
print(f" 新增行数: {differences['summary']['added_count']}")
|
||||
print(f" 删除行数: {differences['summary']['removed_count']}")
|
||||
print(f" 修改行数: {differences['summary']['modified_count']}")
|
||||
print(f" 排除列数: {excluded_count} (检查信息类列不参与对比)")
|
||||
|
||||
# 5. 生成差异报告
|
||||
print("\n步骤5: 生成差异报告...")
|
||||
output_file = self.generate_difference_report()
|
||||
|
||||
if output_file and os.path.exists(output_file):
|
||||
print(f"\n=== 对比完成 ===")
|
||||
print(f"差异报告已生成: {os.path.basename(output_file)}")
|
||||
# print(f"文件位置: {output_file}")
|
||||
print(f"输出目录: {self.get_output_directory()}")
|
||||
else:
|
||||
print("未成功生成差异报告")
|
||||
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
comparator = BOMComparator()
|
||||
comparator.run_comparison()
|
||||
|
||||
input("\n按Enter键退出...")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
635
BOMCompare/BOMCompareForJP1.py
Normal file
635
BOMCompare/BOMCompareForJP1.py
Normal file
@@ -0,0 +1,635 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import tkinter as tk
|
||||
from tkinter import filedialog
|
||||
from datetime import datetime
|
||||
import warnings
|
||||
import re
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.utils.dataframe import dataframe_to_rows
|
||||
|
||||
warnings.filterwarnings('ignore', category=UserWarning, module='openpyxl')
|
||||
|
||||
|
||||
class BOMComparator:
|
||||
def __init__(self):
|
||||
self.column_mapping = {
|
||||
'ITEM': 'Partnumber',
|
||||
'HT PN': 'Partnumber',
|
||||
'MF PN': 'MF_PN',
|
||||
'MFG': 'MF_NAME',
|
||||
'CRD': 'Reference',
|
||||
'Description': 'Description',
|
||||
'Qty': 'Quantity',
|
||||
'焊接方式': '焊接方式',
|
||||
'Remark': '备注'
|
||||
}
|
||||
self.ignore_columns = ['备注']
|
||||
self.required_columns = list(self.column_mapping.values())
|
||||
self.change_columns = [
|
||||
'ITEM', 'HT PN', 'MF PN', 'MFG', 'CRD', 'Description', 'Qty', 'Remark'
|
||||
]
|
||||
self.mandatory_keywords = ['item', 'partnumber', 'mfpn']
|
||||
|
||||
# 异常记录
|
||||
self.validation_errors = []
|
||||
|
||||
self.stats = {
|
||||
'old_bom_rows': 0,
|
||||
'new_bom_rows': 0,
|
||||
'changed_items': 0,
|
||||
'added_items': 0,
|
||||
'removed_items': 0,
|
||||
'total_errors': 0
|
||||
}
|
||||
|
||||
def normalize_text(self, text):
|
||||
if pd.isna(text):
|
||||
return ""
|
||||
text = str(text)
|
||||
text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
|
||||
return text.strip().lower()
|
||||
|
||||
def find_header_row(self, df):
|
||||
print(f"扫描前 {min(20, len(df))} 行寻找标题行...")
|
||||
for i in range(min(20, len(df))):
|
||||
row_values = [self.normalize_text(cell) for cell in df.iloc[i].values]
|
||||
|
||||
contains_all_keywords = True
|
||||
for keyword in self.mandatory_keywords:
|
||||
if not any(keyword in cell_value for cell_value in row_values):
|
||||
contains_all_keywords = False
|
||||
break
|
||||
|
||||
if contains_all_keywords:
|
||||
print(f"✅ 找到有效标题行 (索引 {i}),包含所有必需关键词")
|
||||
return i
|
||||
|
||||
error_msg = (
|
||||
"❌ 未找到有效的标题行:所有标题行必须同时包含以下关键词:\n"
|
||||
f"- Item (或类似表述)\n"
|
||||
f"- Partnumber (或类似表述)\n"
|
||||
f"- MF_PN (或类似表述)\n\n"
|
||||
"在文件的前20行中没有找到同时包含所有关键词的行。"
|
||||
)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
def find_active_sheet(self, file_path):
|
||||
print(f"扫描文件: {os.path.basename(file_path)}")
|
||||
xls = pd.ExcelFile(file_path)
|
||||
|
||||
candidate_sheets = []
|
||||
for sheet_name in xls.sheet_names:
|
||||
# 使用 BOM 或 PCBA 作为关键词
|
||||
if any(keyword in sheet_name.lower() for keyword in ["bom", "pcba"]):
|
||||
candidate_sheets.append(sheet_name)
|
||||
print(f" 发现候选Sheet: {sheet_name} - 关键词匹配")
|
||||
|
||||
# 第一步:优先检查第一个bom候选Sheet
|
||||
successful_sheet = None
|
||||
if candidate_sheets:
|
||||
|
||||
for first_candidate in candidate_sheets:
|
||||
# 先检查第一个候选Sheet
|
||||
# first_candidate = candidate_sheets[0]
|
||||
try:
|
||||
print(f" 优先检查候选Sheet: {first_candidate}")
|
||||
df_preview = pd.read_excel(
|
||||
file_path,
|
||||
sheet_name=first_candidate,
|
||||
header=None,
|
||||
nrows=20,
|
||||
engine='openpyxl'
|
||||
)
|
||||
header_row_idx = self.find_header_row(df_preview)
|
||||
print(f"✅ 在候选Sheet '{first_candidate}' 中找到标题行")
|
||||
# return first_candidate
|
||||
successful_sheet = first_candidate
|
||||
break
|
||||
except Exception as e:
|
||||
print(f" ❌ 优先候选Sheet '{first_candidate}': {str(e)}")
|
||||
# 移除失败的首选候选
|
||||
# candidate_sheets.pop(0)
|
||||
# remove(值) - 移除指定值的元素
|
||||
# candidate_sheets.remove(first_candidate) # 移除值为 'sheet_name' 的元素
|
||||
continue
|
||||
if successful_sheet:
|
||||
return successful_sheet
|
||||
|
||||
# 第二步:如果没找到bom候选Sheet或首选候选失败,遍历所有候选Sheet
|
||||
if not successful_sheet:
|
||||
candidate_sheets = xls.sheet_names
|
||||
print(" 未找到名称包含'BOM'的Sheet,将检查所有Sheet")
|
||||
|
||||
# 遍历剩余候选Sheet
|
||||
for sheet_name in candidate_sheets:
|
||||
try:
|
||||
print(f" 检查Sheet: {sheet_name}")
|
||||
df_preview = pd.read_excel(
|
||||
file_path,
|
||||
sheet_name=sheet_name,
|
||||
header=None,
|
||||
nrows=20,
|
||||
engine='openpyxl'
|
||||
)
|
||||
|
||||
try:
|
||||
header_row_idx = self.find_header_row(df_preview)
|
||||
print(f"✅ 在Sheet '{sheet_name}' 中找到标题行")
|
||||
return sheet_name
|
||||
except ValueError as e:
|
||||
print(f" ❌ Sheet '{sheet_name}': {str(e)}")
|
||||
continue
|
||||
except Exception as e:
|
||||
print(f" 检查Sheet '{sheet_name}' 时出错: {str(e)}")
|
||||
continue
|
||||
|
||||
# 第三步:如果所有候选Sheet都失败,尝试第一个Sheet作为备选
|
||||
print("⚠️ 所有候选Sheet检查失败,尝试第一个Sheet")
|
||||
first_sheet = xls.sheet_names[0]
|
||||
try:
|
||||
df_preview = pd.read_excel(
|
||||
file_path,
|
||||
sheet_name=first_sheet,
|
||||
header=None,
|
||||
nrows=20,
|
||||
engine='openpyxl'
|
||||
)
|
||||
header_row_idx = self.find_header_row(df_preview)
|
||||
print(f"✅ 在备份Sheet '{first_sheet}' 中找到标题行")
|
||||
return first_sheet
|
||||
except Exception as e:
|
||||
print(f"❌ 备份Sheet '{first_sheet}' 也失败: {str(e)}")
|
||||
return None
|
||||
|
||||
def validate_bom(self, bom_df, file_name, sheet_name):
|
||||
"""验证BOM数据并收集异常"""
|
||||
errors = []
|
||||
|
||||
# 1. 检查Partnumber是否有重复
|
||||
dup_partnumbers = bom_df[bom_df.duplicated('Partnumber', keep=False)]
|
||||
if not dup_partnumbers.empty:
|
||||
print(f"⚠️ 发现重复的Partnumber: {len(dup_partnumbers)} 行")
|
||||
for idx, row in dup_partnumbers.iterrows():
|
||||
error = {
|
||||
'文件': file_name,
|
||||
'Sheet': sheet_name,
|
||||
'原始行号': idx + 2, # Excel行号从1开始,标题行下一行
|
||||
'异常类型': '重复Partnumber',
|
||||
'异常描述': f"Partnumber '{row['Partnumber']}' 重复出现"
|
||||
}
|
||||
errors.append(error)
|
||||
|
||||
# 2. 检查Partnumber是否为空
|
||||
empty_partnumbers = bom_df[bom_df['Partnumber'].isna() | (bom_df['Partnumber'] == '')]
|
||||
if not empty_partnumbers.empty:
|
||||
print(f"⚠️ 发现空Partnumber: {len(empty_partnumbers)} 行")
|
||||
for idx, row in empty_partnumbers.iterrows():
|
||||
error = {
|
||||
'文件': file_name,
|
||||
'Sheet': sheet_name,
|
||||
'原始行号': idx + 2,
|
||||
'异常类型': '空Partnumber',
|
||||
'异常描述': "Partnumber为空"
|
||||
}
|
||||
errors.append(error)
|
||||
|
||||
# 3. 验证Reference位号数量与Quantity是否一致
|
||||
for idx, row in bom_df.iterrows():
|
||||
# # 跳过PCB等特殊项
|
||||
# if row.get('Part Type') == 'PCB' or pd.isna(row.get('Reference')):
|
||||
# continue
|
||||
|
||||
refs = str(row['Reference'])
|
||||
qty = row['Quantity']
|
||||
|
||||
try:
|
||||
# 计算实际位号数量
|
||||
ref_count = len([r for r in refs.split(',') if r.strip()])
|
||||
|
||||
# 检查Quantity是否为数字
|
||||
try:
|
||||
qty_val = int(qty)
|
||||
except (ValueError, TypeError):
|
||||
qty_val = -1
|
||||
|
||||
# 验证数量一致性
|
||||
if ref_count != qty_val:
|
||||
error = {
|
||||
'文件': file_name,
|
||||
'Sheet': sheet_name,
|
||||
'原始行号': idx + 2,
|
||||
'异常类型': '数量不一致',
|
||||
'异常描述': f"位号数量({ref_count}) ≠ Quantity({qty})"
|
||||
}
|
||||
errors.append(error)
|
||||
except Exception as e:
|
||||
error = {
|
||||
'文件': file_name,
|
||||
'Sheet': sheet_name,
|
||||
'原始行号': idx + 2,
|
||||
'异常类型': '验证错误',
|
||||
'异常描述': f"验证异常: {str(e)}"
|
||||
}
|
||||
errors.append(error)
|
||||
|
||||
return errors
|
||||
|
||||
def load_bom(self, file_path):
|
||||
print(f"识别激活Sheet...")
|
||||
active_sheet = self.find_active_sheet(file_path)
|
||||
print(f"📊 使用Sheet: {active_sheet}")
|
||||
|
||||
df_preview = pd.read_excel(
|
||||
file_path,
|
||||
sheet_name=active_sheet,
|
||||
header=None,
|
||||
nrows=20
|
||||
)
|
||||
|
||||
header_row_idx = self.find_header_row(df_preview)
|
||||
|
||||
print("加载完整BOM数据...")
|
||||
bom_df = pd.read_excel(
|
||||
file_path,
|
||||
sheet_name=active_sheet,
|
||||
header=header_row_idx,
|
||||
dtype=str
|
||||
)
|
||||
|
||||
if "old_bom_rows" not in self.stats or self.stats['old_bom_rows'] == 0:
|
||||
self.stats['old_bom_rows'] = len(bom_df)
|
||||
else:
|
||||
self.stats['new_bom_rows'] = len(bom_df)
|
||||
|
||||
# 清理列名
|
||||
bom_df.columns = [str(col).strip() for col in bom_df.columns]
|
||||
print(f" 原始列名: {list(bom_df.columns)}")
|
||||
|
||||
# 列名标准化映射
|
||||
column_aliases = {
|
||||
'Item': 'Item',
|
||||
'Partnumber': 'Partnumber',
|
||||
'Part Number': 'Partnumber',
|
||||
'Purchase_Code': 'Purchase_Code',
|
||||
'MF_PN': 'MF_PN',
|
||||
'Description': 'Description',
|
||||
'Part Type': 'Part Type',
|
||||
'MF_NAME': 'MF_NAME',
|
||||
'Manufacturer': 'MF_NAME',
|
||||
'PCB_Footprint': 'PCB_Footprint',
|
||||
'Reference': 'Reference',
|
||||
'References': 'Reference',
|
||||
'Quantity': 'Quantity',
|
||||
'Qty': 'Quantity',
|
||||
'加工方式': '焊接方式',
|
||||
'焊接方式': '焊接方式',
|
||||
'Value': 'Value',
|
||||
'备注': '备注',
|
||||
'Remark': '备注',
|
||||
'Comments': '备注'
|
||||
}
|
||||
|
||||
# 应用别名映射
|
||||
bom_df = bom_df.rename(columns={col: alias for col, alias in column_aliases.items()
|
||||
if col in bom_df.columns})
|
||||
print(f" 标准化后列名: {list(bom_df.columns)}")
|
||||
|
||||
# 确保所有必需列存在
|
||||
missing_cols = [col for col in self.required_columns if col not in bom_df.columns]
|
||||
if missing_cols:
|
||||
raise ValueError(f"❌ 缺少必需列: {', '.join(missing_cols)}")
|
||||
|
||||
# 清理数据:去除空行和无效项
|
||||
initial_count = len(bom_df)
|
||||
bom_df = bom_df.replace('', np.nan)
|
||||
bom_df = bom_df.dropna(subset=['Item'], how='all')
|
||||
cleaned_count = len(bom_df)
|
||||
|
||||
if initial_count > cleaned_count:
|
||||
print(
|
||||
f" 清理空行: 移除 {initial_count - cleaned_count} 行 (原 {initial_count} 行 -> 现 {cleaned_count} 行)")
|
||||
|
||||
# 执行数据验证
|
||||
file_name = os.path.basename(file_path)
|
||||
errors = self.validate_bom(bom_df, file_name, active_sheet)
|
||||
self.validation_errors.extend(errors)
|
||||
self.stats['total_errors'] += len(errors)
|
||||
|
||||
if errors:
|
||||
print(f"⚠️ 在 '{file_name}' 中发现 {len(errors)} 个数据异常")
|
||||
|
||||
return bom_df, active_sheet
|
||||
|
||||
def compare_reference_lists(self, old_refs_str, new_refs_str):
|
||||
"""比较两个Reference列表,返回差异描述"""
|
||||
if pd.isna(old_refs_str):
|
||||
old_refs_str = ""
|
||||
if pd.isna(new_refs_str):
|
||||
new_refs_str = ""
|
||||
|
||||
old_refs = set([ref.strip() for ref in str(old_refs_str).split(',') if ref.strip()])
|
||||
new_refs = set([ref.strip() for ref in str(new_refs_str).split(',') if ref.strip()])
|
||||
|
||||
# 如果两个集合相同,返回空字符串表示无差异
|
||||
if old_refs == new_refs:
|
||||
return ""
|
||||
|
||||
# 计算差异
|
||||
added_refs = new_refs - old_refs
|
||||
removed_refs = old_refs - new_refs
|
||||
|
||||
diff_msgs = []
|
||||
if added_refs:
|
||||
diff_msgs.append(f"增加位号: {','.join(sorted(added_refs))}")
|
||||
if removed_refs:
|
||||
diff_msgs.append(f"删除位号: {','.join(sorted(removed_refs))}")
|
||||
|
||||
return "; ".join(diff_msgs)
|
||||
|
||||
def compare_boms(self, old_bom, new_bom):
|
||||
print("开始比较两份BOM...")
|
||||
old_bom['Partnumber'] = old_bom['Partnumber'].astype(str).str.strip()
|
||||
new_bom['Partnumber'] = new_bom['Partnumber'].astype(str).str.strip()
|
||||
|
||||
changes = []
|
||||
|
||||
old_partnumbers = set(old_bom['Partnumber'].unique())
|
||||
if len(old_partnumbers) != len(old_bom):
|
||||
print(f"⚠️ 旧BOM有重复的Partnumber: 总行数{len(old_bom)},唯一物料数{len(old_partnumbers)}")
|
||||
new_partnumbers = set(new_bom['Partnumber'].unique())
|
||||
if len(new_partnumbers) != len(new_bom):
|
||||
print(f"⚠️ 新BOM有重复的Partnumber: 总行数{len(new_bom)},唯一物料数{len(new_partnumbers)}")
|
||||
|
||||
all_partnumbers = sorted(old_partnumbers | new_partnumbers)
|
||||
print(f" 总物料项数量: {len(all_partnumbers)} (旧BOM: {len(old_partnumbers)}, 新BOM: {len(new_partnumbers)})")
|
||||
|
||||
for idx, pn in enumerate(all_partnumbers):
|
||||
if (idx + 1) % 100 == 0 or (idx + 1) == len(all_partnumbers):
|
||||
print(f" 处理进度: {idx + 1}/{len(all_partnumbers)} 项物料")
|
||||
|
||||
record = {'ITEM_OLD': '', 'ITEM_NEW': ''}
|
||||
old_row = None
|
||||
new_row = None
|
||||
change_desc = ""
|
||||
|
||||
old_match = old_bom[old_bom['Partnumber'] == pn]
|
||||
if not old_match.empty:
|
||||
old_row = old_match.iloc[0]
|
||||
record['ITEM_OLD'] = old_row['Item']
|
||||
|
||||
new_match = new_bom[new_bom['Partnumber'] == pn]
|
||||
if not new_match.empty:
|
||||
new_row = new_match.iloc[0]
|
||||
record['ITEM_NEW'] = new_row['Item']
|
||||
|
||||
change_type = ""
|
||||
if old_row is None:
|
||||
change_type = "新增"
|
||||
self.stats['added_items'] += 1
|
||||
change_desc = "新增物料"
|
||||
elif new_row is None:
|
||||
change_type = "删除"
|
||||
self.stats['removed_items'] += 1
|
||||
change_desc = "删除物料"
|
||||
else:
|
||||
change_type = "变更"
|
||||
self.stats['changed_items'] += 1
|
||||
|
||||
# 填充左侧列(旧BOM值)
|
||||
for change_col, bom_col in self.column_mapping.items():
|
||||
if change_col == 'ITEM':
|
||||
continue
|
||||
old_val = old_row[bom_col] if old_row is not None and bom_col in old_row else ''
|
||||
record[change_col] = old_val
|
||||
|
||||
# 填充右侧列(新BOM值)
|
||||
for change_col, bom_col in self.column_mapping.items():
|
||||
if change_col == 'ITEM':
|
||||
continue
|
||||
new_val = new_row[bom_col] if new_row is not None and bom_col in new_row else ''
|
||||
record[f'NEW_{change_col}'] = new_val
|
||||
|
||||
if change_type == "变更":
|
||||
change_details = []
|
||||
qty_changed = False
|
||||
if 'Quantity' in old_row.index and 'Quantity' in new_row.index:
|
||||
old_qty = str(old_row['Quantity'])
|
||||
new_qty = str(new_row['Quantity'])
|
||||
if old_qty != new_qty:
|
||||
change_details.append(f"Qty: {old_qty}→{new_qty}")
|
||||
qty_changed = True
|
||||
|
||||
mfpn_changed = False
|
||||
if 'MF_PN' in old_row.index and 'MF_PN' in new_row.index:
|
||||
old_mfpn = str(old_row['MF_PN'])
|
||||
new_mfpn = str(new_row['MF_PN'])
|
||||
if old_mfpn != new_mfpn:
|
||||
change_details.append(f"MF PN: {old_mfpn}→{new_mfpn}")
|
||||
mfpn_changed = True
|
||||
|
||||
# 优化:使用新的Reference比较方法
|
||||
if 'Reference' in old_row.index and 'Reference' in new_row.index:
|
||||
ref_diff = self.compare_reference_lists(old_row['Reference'], new_row['Reference'])
|
||||
if ref_diff:
|
||||
change_details.append(ref_diff)
|
||||
|
||||
for change_col, bom_col in self.column_mapping.items():
|
||||
if (change_col == 'ITEM' or
|
||||
bom_col in ['Quantity', 'MF_PN', 'Reference'] or
|
||||
bom_col in self.ignore_columns):
|
||||
continue
|
||||
|
||||
old_val = old_row[bom_col] if old_row is not None and bom_col in old_row else ''
|
||||
new_val = new_row[bom_col] if new_row is not None and bom_col in new_row else ''
|
||||
|
||||
if str(old_val) != str(new_val):
|
||||
change_details.append(f"{change_col}: {old_val}→{new_val}")
|
||||
|
||||
if change_details:
|
||||
change_desc = "; ".join(change_details)
|
||||
else:
|
||||
change_type = ""
|
||||
|
||||
record['Design change Type'] = change_type
|
||||
record['NEW_Remark'] = change_desc
|
||||
|
||||
if change_type:
|
||||
changes.append(record)
|
||||
|
||||
left_columns = ['ITEM_OLD'] + [col for col in self.change_columns if col != 'ITEM']
|
||||
middle_columns = ['Design change Type']
|
||||
right_columns = ['ITEM_NEW'] + [f'NEW_{col}' for col in self.change_columns if col != 'ITEM']
|
||||
|
||||
if 'NEW_Remark' in right_columns:
|
||||
right_columns.remove('NEW_Remark')
|
||||
right_columns.append('NEW_Remark')
|
||||
|
||||
change_columns = left_columns + middle_columns + right_columns
|
||||
right_start_col = len(left_columns) + len(middle_columns) + 1
|
||||
|
||||
return pd.DataFrame(changes, columns=change_columns), right_start_col
|
||||
|
||||
def generate_summary(self):
|
||||
summary = [
|
||||
"\n" + "=" * 50,
|
||||
"BOM 比较处理汇总",
|
||||
"-" * 50,
|
||||
f"原始BOM行数: {self.stats['old_bom_rows']}",
|
||||
f"新BOM行数: {self.stats['new_bom_rows']}",
|
||||
f"变更物料数量: {self.stats['changed_items']}",
|
||||
f"新增物料数量: {self.stats['added_items']}",
|
||||
f"删除物料数量: {self.stats['removed_items']}",
|
||||
f"变更记录总数: {self.stats['changed_items'] + self.stats['added_items'] + self.stats['removed_items']}",
|
||||
f"数据异常总数: {self.stats['total_errors']}",
|
||||
"=" * 50
|
||||
]
|
||||
return "\n".join(summary)
|
||||
|
||||
def generate_change_record(self):
|
||||
root = tk.Tk()
|
||||
root.withdraw()
|
||||
|
||||
# 重置统计信息和异常记录
|
||||
self.stats = {
|
||||
'old_bom_rows': 0,
|
||||
'new_bom_rows': 0,
|
||||
'changed_items': 0,
|
||||
'added_items': 0,
|
||||
'removed_items': 0,
|
||||
'total_errors': 0
|
||||
}
|
||||
self.validation_errors = []
|
||||
|
||||
try:
|
||||
# 选择原始BOM文件
|
||||
print("\n" + "=" * 50)
|
||||
print("步骤 1/4: 选择原始BOM文件")
|
||||
print("=" * 50)
|
||||
old_file = filedialog.askopenfilename(
|
||||
title="选择原始BOM文件",
|
||||
filetypes=[("Excel Files", "*.xlsx *.xls")]
|
||||
)
|
||||
if not old_file:
|
||||
print("❌ 未选择文件,操作取消")
|
||||
return
|
||||
print(f"📂 已选择原始BOM: {old_file}")
|
||||
old_file_name = os.path.basename(old_file)
|
||||
# output_dir = os.path.dirname(old_file)
|
||||
|
||||
# 选择变更后BOM文件
|
||||
print("\n" + "=" * 50)
|
||||
print("步骤 2/4: 选择变更后BOM文件")
|
||||
print("=" * 50)
|
||||
new_file = filedialog.askopenfilename(
|
||||
title="选择变更后BOM文件",
|
||||
filetypes=[("Excel Files", "*.xlsx *.xls")]
|
||||
)
|
||||
if not new_file:
|
||||
print("❌ 未选择文件,操作取消")
|
||||
return
|
||||
print(f"📂 已选择新BOM: {new_file}")
|
||||
new_file_name = os.path.basename(new_file)
|
||||
output_dir = os.path.dirname(new_file)
|
||||
|
||||
# 加载BOM文件
|
||||
print("\n" + "=" * 50)
|
||||
print("步骤 3/4: 加载并处理BOM文件")
|
||||
print("=" * 50)
|
||||
print(f"🔍 加载原始BOM文件: {old_file_name}")
|
||||
old_bom, old_bom_activesheetname = self.load_bom(old_file)
|
||||
print(f"✅ 原始BOM加载完成,共 {len(old_bom)} 行")
|
||||
|
||||
print(f"\n🔍 加载变更后BOM文件: {new_file_name}")
|
||||
new_bom, new_bom_activesheetname = self.load_bom(new_file)
|
||||
print(f"✅ 新BOM加载完成,共 {len(new_bom)} 行")
|
||||
|
||||
# 比较BOM生成变更记录
|
||||
print("\n" + "=" * 50)
|
||||
print("步骤 4/4: 比较BOM差异并生成变更记录")
|
||||
print("=" * 50)
|
||||
print("🔍 比较BOM差异...")
|
||||
change_df, right_start_col = self.compare_boms(old_bom, new_bom)
|
||||
|
||||
# 准备输出文件名
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
output_file = f"{old_bom_activesheetname} to {new_bom_activesheetname} eBOM_change_record_{timestamp}.xlsx"
|
||||
output_path = os.path.join(output_dir, output_file)
|
||||
|
||||
# 保存变更记录和异常记录
|
||||
print(f"\n💾 保存变更记录文件: {output_path}")
|
||||
wb = Workbook()
|
||||
|
||||
# 创建变更记录工作表
|
||||
ws_change = wb.active
|
||||
ws_change.title = "PCBA_BOM_change record"
|
||||
|
||||
if change_df.empty:
|
||||
ws_change.cell(row=1, column=1, value="两份BOM完全相同,无变更记录")
|
||||
print("✅ 两份BOM完全相同,无变更记录")
|
||||
else:
|
||||
# 重命名列
|
||||
column_rename = {
|
||||
'ITEM_OLD': 'ITEM',
|
||||
'ITEM_NEW': 'ITEM',
|
||||
**{f'NEW_{col}': col for col in self.change_columns if col != 'ITEM'},
|
||||
'NEW_Remark': 'Remark'
|
||||
}
|
||||
change_df = change_df.rename(columns=column_rename)
|
||||
|
||||
# 添加文件名信息
|
||||
ws_change.cell(row=1, column=1, value=old_file_name)
|
||||
ws_change.cell(row=1, column=right_start_col, value=new_file_name)
|
||||
|
||||
# 添加列标题
|
||||
col_names = change_df.columns.tolist()
|
||||
for col_idx, col_name in enumerate(col_names, 1):
|
||||
ws_change.cell(row=2, column=col_idx, value=col_name)
|
||||
|
||||
# 添加数据行
|
||||
for r_idx, row in enumerate(dataframe_to_rows(change_df, index=False, header=False), 3):
|
||||
for c_idx, value in enumerate(row, 1):
|
||||
ws_change.cell(row=r_idx, column=c_idx, value=value)
|
||||
|
||||
# 创建异常记录工作表
|
||||
if self.validation_errors:
|
||||
print(f"⚠️ 发现 {len(self.validation_errors)} 个数据异常,创建异常记录")
|
||||
ws_errors = wb.create_sheet(title="BOM异常记录")
|
||||
|
||||
# 异常记录列名
|
||||
error_columns = ['文件', 'Sheet', '原始行号', '异常类型', '异常描述']
|
||||
for col_idx, col_name in enumerate(error_columns, 1):
|
||||
ws_errors.cell(row=1, column=col_idx, value=col_name)
|
||||
|
||||
# 添加异常数据
|
||||
for row_idx, error in enumerate(self.validation_errors, 2):
|
||||
ws_errors.cell(row=row_idx, column=1, value=error['文件'])
|
||||
ws_errors.cell(row=row_idx, column=2, value=error['Sheet'])
|
||||
ws_errors.cell(row=row_idx, column=3, value=error['原始行号'])
|
||||
ws_errors.cell(row=row_idx, column=4, value=error['异常类型'])
|
||||
ws_errors.cell(row=row_idx, column=5, value=error['异常描述'])
|
||||
|
||||
# 保存工作簿
|
||||
wb.save(output_path)
|
||||
|
||||
# 打印处理汇总
|
||||
print(self.generate_summary())
|
||||
print(f"\n✅ 变更记录已保存至: {output_path}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n❌ 处理过程中出错: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("=" * 60)
|
||||
print(" PCBA BOM 变更记录生成工具 ")
|
||||
print("=" * 60)
|
||||
print("要求: 标题行必须同时包含 'Item', 'Partnumber', 'MF_PN'")
|
||||
comparator = BOMComparator()
|
||||
comparator.generate_change_record()
|
||||
print("\n" + "=" * 50)
|
||||
print(" 处理完成,按任意键退出... ")
|
||||
# input()
|
||||
618
BOMCompare/BOMConsolidatorV1.py
Normal file
618
BOMCompare/BOMConsolidatorV1.py
Normal file
@@ -0,0 +1,618 @@
|
||||
import pandas as pd
|
||||
import os
|
||||
import glob
|
||||
import re
|
||||
from datetime import datetime
|
||||
import tkinter as tk
|
||||
from tkinter import filedialog
|
||||
from collections import defaultdict
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, List, Optional, Tuple, Any
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProcessedFileInfo:
|
||||
"""处理文件信息类"""
|
||||
filename: str
|
||||
sheet_name: str
|
||||
start_row: int
|
||||
total_rows: int
|
||||
valid_rows: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class BOMRow:
|
||||
"""BOM行数据类"""
|
||||
partnumber: str
|
||||
purchase_code: str
|
||||
mf_pn: str
|
||||
description: str
|
||||
part_type: str
|
||||
mf_name: str
|
||||
pcb_footprint: str
|
||||
quantity: int
|
||||
reference: str
|
||||
filename: str = ""
|
||||
sheet_name: str = ""
|
||||
|
||||
@classmethod
|
||||
def from_dataframe_row(cls, row: pd.Series, filename: str = "", sheet_name: str = "") -> Optional['BOMRow']:
|
||||
"""从DataFrame行创建BOMRow对象"""
|
||||
try:
|
||||
return cls(
|
||||
partnumber=str(row.get('Partnumber', '')).strip(),
|
||||
purchase_code=str(row.get('Purchase_Code', '')).strip(),
|
||||
mf_pn=str(row.get('MF_PN', '')).strip(),
|
||||
description=str(row.get('Description', '')).strip(),
|
||||
part_type=str(row.get('Part_Type', '')).strip(),
|
||||
mf_name=str(row.get('MF_NAME', '')).strip(),
|
||||
pcb_footprint=str(row.get('PCB_Footprint', '')).strip(),
|
||||
quantity=int(row.get('Quantity', 0)),
|
||||
reference=str(row.get('Reference', '')).strip(),
|
||||
filename=filename,
|
||||
sheet_name=sheet_name
|
||||
)
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
def get_key(self) -> str:
|
||||
"""获取行的唯一标识键"""
|
||||
return self.partnumber if self.partnumber else self.mf_pn
|
||||
|
||||
def is_valid(self) -> bool:
|
||||
"""检查行数据是否有效"""
|
||||
return bool(self.get_key())
|
||||
|
||||
|
||||
@dataclass
|
||||
class ConsolidatedMaterial:
|
||||
"""合并后的物料数据类"""
|
||||
partnumber: str
|
||||
purchase_code: str
|
||||
mf_pn: str
|
||||
description: str
|
||||
part_type: str
|
||||
mf_name: str
|
||||
pcb_footprint: str
|
||||
quantity_data: Dict[str, int] # 文件名: 数量
|
||||
inconsistencies: List[str]
|
||||
|
||||
@property
|
||||
def total_quantity(self) -> int:
|
||||
"""计算总数量"""
|
||||
return sum(self.quantity_data.values())
|
||||
|
||||
@property
|
||||
def has_inconsistencies(self) -> bool:
|
||||
"""检查是否有不一致"""
|
||||
return len(self.inconsistencies) > 0
|
||||
|
||||
|
||||
class ConsistencyChecker:
|
||||
"""一致性检查器"""
|
||||
|
||||
def __init__(self):
|
||||
self.fields_to_check = ['Purchase_Code', 'MF_PN', 'Part_Type', 'MF_NAME', 'PCB_Footprint']
|
||||
|
||||
def check_field_consistency(self, existing: ConsolidatedMaterial, new_row: BOMRow) -> List[str]:
|
||||
"""检查字段一致性"""
|
||||
inconsistencies = []
|
||||
|
||||
field_mapping = {
|
||||
'Purchase_Code': ('purchase_code', 'Purchase_Code'),
|
||||
'MF_PN': ('mf_pn', 'MF_PN'),
|
||||
'Part_Type': ('part_type', 'Part Type'),
|
||||
'MF_NAME': ('mf_name', 'MF_NAME'),
|
||||
'PCB_Footprint': ('pcb_footprint', 'PCB_Footprint')
|
||||
}
|
||||
|
||||
for field, (attr_name, row_field) in field_mapping.items():
|
||||
existing_val = getattr(existing, attr_name)
|
||||
new_val = getattr(new_row, attr_name)
|
||||
|
||||
if self._should_check_field(existing_val, new_val) and existing_val != new_val:
|
||||
inconsistencies.append(
|
||||
f"{field}不一致: {existing_val} ≠ {new_val} (文件: {new_row.filename}, Sheet: {new_row.sheet_name})"
|
||||
)
|
||||
|
||||
return inconsistencies
|
||||
|
||||
def check_quantity_reference(self, row: BOMRow) -> Optional[str]:
|
||||
"""检查Reference数量和Quantity是否匹配"""
|
||||
if not row.reference:
|
||||
return None
|
||||
|
||||
ref_count = len([ref for ref in row.reference.split(',') if ref.strip()])
|
||||
|
||||
if ref_count != row.quantity:
|
||||
return f"Reference数量不符: {ref_count}个位置 ≠ Quantity={row.quantity} (文件: {row.filename}, Sheet: {row.sheet_name})"
|
||||
|
||||
return None
|
||||
|
||||
def _should_check_field(self, existing_val: str, new_val: str) -> bool:
|
||||
"""判断是否应该检查字段"""
|
||||
# 忽略空值和无意义值
|
||||
if not new_val or new_val.lower() in ['', 'nan', 'none', 'null']:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class BOMFileParser:
|
||||
"""BOM文件解析器"""
|
||||
|
||||
def __init__(self):
|
||||
self.required_headers = ['Item', 'Partnumber', 'Purchase_Code', 'MF_PN']
|
||||
self.required_columns = ['Partnumber', 'Purchase_Code', 'MF_PN', 'Description',
|
||||
'Part_Type', 'MF_NAME', 'PCB_Footprint', 'Quantity', 'Reference']
|
||||
|
||||
def find_valid_sheet(self, file_path: str) -> Optional[Tuple[str, int]]:
|
||||
"""定位包含有效BOM的Sheet"""
|
||||
try:
|
||||
xl = pd.ExcelFile(file_path)
|
||||
|
||||
for sheet_name in xl.sheet_names:
|
||||
df = pd.read_excel(file_path, sheet_name=sheet_name, header=None)
|
||||
|
||||
for i in range(min(len(df), 10)): # 只检查前10行
|
||||
headers = df.iloc[i].values
|
||||
if all(col in str(headers) for col in self.required_headers):
|
||||
filename = os.path.basename(file_path)
|
||||
print(f"文件{filename}找到有效sheet {sheet_name}|有效数据行从 {i} 开始。")
|
||||
return sheet_name, i
|
||||
except Exception as e:
|
||||
print(f"读取文件 {file_path} 时出错: {e}")
|
||||
|
||||
return None, None
|
||||
|
||||
def parse_file(self, file_path: str) -> Optional[Tuple[List[BOMRow], ProcessedFileInfo]]:
|
||||
"""解析BOM文件"""
|
||||
filename = os.path.basename(file_path)
|
||||
sheet_name, header_row = self.find_valid_sheet(file_path)
|
||||
|
||||
if not sheet_name:
|
||||
return None
|
||||
|
||||
try:
|
||||
df = pd.read_excel(file_path, sheet_name=sheet_name, header=header_row)
|
||||
total_rows = len(df)
|
||||
df = self._clean_dataframe(df)
|
||||
|
||||
if not self._validate_columns(df):
|
||||
return None
|
||||
|
||||
bom_rows = []
|
||||
valid_rows = 0
|
||||
for _, row_data in df.iterrows():
|
||||
bom_row = BOMRow.from_dataframe_row(row_data, filename, sheet_name)
|
||||
if bom_row and bom_row.is_valid():
|
||||
bom_rows.append(bom_row)
|
||||
valid_rows += 1
|
||||
|
||||
# 创建文件信息对象
|
||||
file_info = ProcessedFileInfo(
|
||||
filename=filename,
|
||||
sheet_name=sheet_name,
|
||||
start_row=header_row,
|
||||
total_rows=total_rows,
|
||||
valid_rows=valid_rows
|
||||
)
|
||||
|
||||
return bom_rows, file_info
|
||||
|
||||
except Exception as e:
|
||||
print(f"解析文件 {file_path} 时出错: {e}")
|
||||
return None
|
||||
|
||||
def _clean_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""清洗DataFrame"""
|
||||
# 清理列名
|
||||
df.columns = df.columns.str.strip().str.replace(r'\s+', '_', regex=True)
|
||||
df.columns = df.columns.str.replace(r'[^a-zA-Z0-9_]', '', regex=True)
|
||||
|
||||
# 去除空行
|
||||
df = df.dropna(how='all')
|
||||
|
||||
return df
|
||||
|
||||
def _validate_columns(self, df: pd.DataFrame) -> bool:
|
||||
"""验证必要列是否存在"""
|
||||
missing_cols = [col for col in self.required_columns if col not in df.columns]
|
||||
return len(missing_cols) == 0
|
||||
|
||||
|
||||
class MaterialConsolidator:
|
||||
"""物料合并器"""
|
||||
|
||||
def __init__(self):
|
||||
self.materials: Dict[str, ConsolidatedMaterial] = {}
|
||||
self.consistency_checker = ConsistencyChecker()
|
||||
self.file_quantities: Dict[str, Dict[str, int]] = defaultdict(dict)
|
||||
self.processed_files_info: List[ProcessedFileInfo] = []
|
||||
|
||||
def add_bom_row(self, bom_row: BOMRow) -> None:
|
||||
"""添加BOM行数据"""
|
||||
key = bom_row.get_key()
|
||||
|
||||
if key not in self.materials:
|
||||
# 创建新的合并物料
|
||||
self.materials[key] = ConsolidatedMaterial(
|
||||
partnumber=bom_row.partnumber,
|
||||
purchase_code=bom_row.purchase_code,
|
||||
mf_pn=bom_row.mf_pn,
|
||||
description=bom_row.description,
|
||||
part_type=bom_row.part_type,
|
||||
mf_name=bom_row.mf_name,
|
||||
pcb_footprint=bom_row.pcb_footprint,
|
||||
quantity_data={},
|
||||
inconsistencies=[]
|
||||
)
|
||||
|
||||
material = self.materials[key]
|
||||
|
||||
# 检查一致性
|
||||
inconsistencies = self.consistency_checker.check_field_consistency(material, bom_row)
|
||||
material.inconsistencies.extend(inconsistencies)
|
||||
|
||||
# 检查数量引用
|
||||
ref_inconsistency = self.consistency_checker.check_quantity_reference(bom_row)
|
||||
if ref_inconsistency:
|
||||
material.inconsistencies.append(ref_inconsistency)
|
||||
|
||||
# 记录数量数据
|
||||
material.quantity_data[bom_row.filename] = bom_row.quantity
|
||||
self.file_quantities[bom_row.filename][key] = bom_row.quantity
|
||||
|
||||
def add_file_info(self, file_info: ProcessedFileInfo) -> None:
|
||||
"""添加文件处理信息"""
|
||||
self.processed_files_info.append(file_info)
|
||||
|
||||
def get_statistics(self) -> Dict[str, Any]:
|
||||
"""获取统计信息"""
|
||||
total_inconsistencies = sum(len(mat.inconsistencies) for mat in self.materials.values())
|
||||
materials_with_issues = sum(1 for mat in self.materials.values() if mat.has_inconsistencies)
|
||||
|
||||
return {
|
||||
'total_materials': len(self.materials),
|
||||
'total_inconsistencies': total_inconsistencies,
|
||||
'materials_with_issues': materials_with_issues,
|
||||
'file_count': len(self.file_quantities),
|
||||
'processed_files_info': self.processed_files_info
|
||||
}
|
||||
|
||||
|
||||
class ReportGenerator:
|
||||
"""报告生成器"""
|
||||
|
||||
def __init__(self, output_folder: str):
|
||||
self.output_folder = output_folder
|
||||
self._ensure_output_directory()
|
||||
|
||||
def _ensure_output_directory(self):
|
||||
"""确保输出目录存在"""
|
||||
output_dir = os.path.join(self.output_folder, "BOM_Merge_out")
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
def _create_summary_sheet(self, stats: Dict[str, Any]) -> pd.DataFrame:
|
||||
"""创建汇总信息Sheet"""
|
||||
summary_data = [
|
||||
["BOM合并检查汇总报告", ""],
|
||||
["生成时间", datetime.now().strftime("%Y-%m-%d %H:%M:%S")],
|
||||
["", ""],
|
||||
["处理统计", ""],
|
||||
["扫描文件总数", stats['total_files']],
|
||||
["成功处理文件数", stats['processed_files']],
|
||||
["处理数据行数", stats['processed_rows']],
|
||||
["", ""],
|
||||
["物料统计", ""],
|
||||
["合并物料种类数", stats['total_materials']],
|
||||
["存在问题的物料数", stats['materials_with_issues']],
|
||||
["不一致问题总数", stats['total_inconsistencies']],
|
||||
["", ""],
|
||||
["数据源文件信息", ""],
|
||||
["有效文件总数", len(stats.get('processed_files_info', []))],
|
||||
["", ""]
|
||||
]
|
||||
|
||||
# 添加详细的数据源文件信息
|
||||
files_info = stats.get('processed_files_info', [])
|
||||
for i, file_info in enumerate(files_info, 1):
|
||||
summary_data.extend([
|
||||
[f"数据源文件 {i}", file_info.filename],
|
||||
[" Sheet名称", file_info.sheet_name],
|
||||
[" 起始行", file_info.start_row + 1], # 转换为1-based索引
|
||||
[" 总行数", file_info.total_rows],
|
||||
[" 有效行数", file_info.valid_rows],
|
||||
["", ""]
|
||||
])
|
||||
|
||||
summary_data.extend([
|
||||
["", ""],
|
||||
["文件信息", ""],
|
||||
["输出文件夹", os.path.join(self.output_folder, "BOM_Merge_out")],
|
||||
["报告文件", stats.get('output_filename', '')],
|
||||
["合并Sheet名称", "BOM_Merge"]
|
||||
])
|
||||
|
||||
return pd.DataFrame(summary_data, columns=["项目", "数值"])
|
||||
|
||||
def _create_data_source_sheet(self, stats: Dict[str, Any]) -> pd.DataFrame:
|
||||
"""创建数据源文件详细信息Sheet"""
|
||||
files_info = stats.get('processed_files_info', [])
|
||||
|
||||
if not files_info:
|
||||
return pd.DataFrame([["无有效数据源文件", ""]], columns=["状态", "说明"])
|
||||
|
||||
data_source_data = []
|
||||
for i, file_info in enumerate(files_info, 1):
|
||||
data_source_data.append({
|
||||
'序号': i,
|
||||
'文件名': file_info.filename,
|
||||
'Sheet名称': file_info.sheet_name,
|
||||
'数据起始行': file_info.start_row + 1, # 转换为1-based索引
|
||||
'总行数': file_info.total_rows,
|
||||
'有效行数': file_info.valid_rows,
|
||||
'处理状态': '成功'
|
||||
})
|
||||
|
||||
return pd.DataFrame(data_source_data)
|
||||
|
||||
def _create_merge_sheet(self, consolidator: MaterialConsolidator) -> pd.DataFrame:
|
||||
"""创建合并数据Sheet"""
|
||||
report_data = []
|
||||
file_columns = sorted(consolidator.file_quantities.keys())
|
||||
|
||||
for material in consolidator.materials.values():
|
||||
row = {
|
||||
'Partnumber': material.partnumber,
|
||||
'Purchase_Code': material.purchase_code,
|
||||
'MF_PN': material.mf_pn,
|
||||
'Description': material.description,
|
||||
'Part Type': material.part_type,
|
||||
'MF_NAME': material.mf_name,
|
||||
'PCB_Footprint': material.pcb_footprint,
|
||||
'检查信息': '; '.join(material.inconsistencies) if material.inconsistencies else '一致'
|
||||
}
|
||||
|
||||
# 添加各文件数量
|
||||
for file in file_columns:
|
||||
row[file] = material.quantity_data.get(file, 0)
|
||||
row['合计'] = material.total_quantity
|
||||
|
||||
report_data.append(row)
|
||||
|
||||
return pd.DataFrame(report_data)
|
||||
|
||||
def generate_consolidated_report(self, consolidator: MaterialConsolidator, stats: Dict[str, Any]) -> Optional[str]:
|
||||
"""生成包含多个Sheet的合并报告"""
|
||||
if not consolidator.materials:
|
||||
return None
|
||||
|
||||
# 生成带时间戳的文件名
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
output_filename = f"BOM合并报告_{timestamp}.xlsx"
|
||||
output_path = os.path.join(self.output_folder, "BOM_Merge_out", output_filename)
|
||||
|
||||
try:
|
||||
# 使用ExcelWriter创建多Sheet的Excel文件
|
||||
with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
|
||||
# Sheet 1: 汇总信息
|
||||
summary_df = self._create_summary_sheet(stats)
|
||||
summary_df.to_excel(writer, sheet_name='汇总信息', index=False)
|
||||
|
||||
# Sheet 2: 数据源文件信息
|
||||
data_source_df = self._create_data_source_sheet(stats)
|
||||
data_source_df.to_excel(writer, sheet_name='数据源文件', index=False)
|
||||
|
||||
# Sheet 3: 合并数据
|
||||
merge_df = self._create_merge_sheet(consolidator)
|
||||
merge_df.to_excel(writer, sheet_name='BOM_Merge', index=False)
|
||||
|
||||
# 调整列宽
|
||||
workbook = writer.book
|
||||
|
||||
# 调整汇总信息Sheet列宽
|
||||
summary_sheet = workbook['汇总信息']
|
||||
summary_sheet.column_dimensions['A'].width = 25
|
||||
summary_sheet.column_dimensions['B'].width = 40
|
||||
|
||||
# 调整数据源文件Sheet列宽
|
||||
data_source_sheet = workbook['数据源文件']
|
||||
for col in data_source_sheet.columns:
|
||||
max_length = 0
|
||||
column = col[0].column_letter
|
||||
for cell in col:
|
||||
try:
|
||||
if len(str(cell.value)) > max_length:
|
||||
max_length = len(str(cell.value))
|
||||
except:
|
||||
pass
|
||||
adjusted_width = min(max_length + 2, 30)
|
||||
data_source_sheet.column_dimensions[column].width = adjusted_width
|
||||
|
||||
# 调整合并数据Sheet列宽
|
||||
merge_sheet = workbook['BOM_Merge']
|
||||
for col in merge_sheet.columns:
|
||||
max_length = 0
|
||||
column = col[0].column_letter
|
||||
for cell in col:
|
||||
try:
|
||||
if len(str(cell.value)) > max_length:
|
||||
max_length = len(str(cell.value))
|
||||
except:
|
||||
pass
|
||||
adjusted_width = min(max_length + 2, 50)
|
||||
merge_sheet.column_dimensions[column].width = adjusted_width
|
||||
|
||||
# 更新stats中的文件名
|
||||
stats['output_filename'] = output_filename
|
||||
|
||||
return output_path
|
||||
|
||||
except Exception as e:
|
||||
print(f"保存报告失败: {e}")
|
||||
return None
|
||||
|
||||
|
||||
class BOMProcessor:
|
||||
"""BOM处理器 - 主控制器"""
|
||||
|
||||
def __init__(self):
|
||||
self.file_parser = BOMFileParser()
|
||||
self.material_consolidator = MaterialConsolidator()
|
||||
self.report_generator: Optional[ReportGenerator] = None
|
||||
|
||||
# 统计信息
|
||||
self.processed_files = 0
|
||||
self.processed_rows = 0
|
||||
self.total_files = 0
|
||||
|
||||
def set_output_folder(self, folder_path: str):
|
||||
"""设置输出文件夹"""
|
||||
self.report_generator = ReportGenerator(folder_path)
|
||||
|
||||
def process_folder(self, folder_path: str) -> bool:
|
||||
"""处理文件夹中的所有BOM文件"""
|
||||
bom_files = glob.glob(os.path.join(folder_path, "*.xlsx"))
|
||||
self.total_files = len(bom_files)
|
||||
|
||||
if not bom_files:
|
||||
return False
|
||||
|
||||
successful_files = 0
|
||||
for file_path in bom_files:
|
||||
if self._process_single_file(file_path):
|
||||
successful_files += 1
|
||||
|
||||
self.processed_files = successful_files
|
||||
return successful_files > 0
|
||||
|
||||
def _process_single_file(self, file_path: str) -> bool:
|
||||
"""处理单个文件"""
|
||||
filename = os.path.basename(file_path)
|
||||
print(f"处理文件: {filename}...")
|
||||
|
||||
result = self.file_parser.parse_file(file_path)
|
||||
if not result:
|
||||
print(f" ! 无法解析文件: {filename}")
|
||||
return False
|
||||
|
||||
bom_rows, file_info = result
|
||||
|
||||
print(f" √ 文件{filename}找到 {len(bom_rows)} 行有效数据 (Sheet: {file_info.sheet_name})")
|
||||
|
||||
# 添加文件处理信息
|
||||
self.material_consolidator.add_file_info(file_info)
|
||||
|
||||
# 处理BOM行数据
|
||||
for bom_row in bom_rows:
|
||||
self.material_consolidator.add_bom_row(bom_row)
|
||||
self.processed_rows += 1
|
||||
|
||||
return True
|
||||
|
||||
def generate_report(self) -> Optional[Dict[str, Any]]:
|
||||
"""生成报告并返回统计信息"""
|
||||
if not self.report_generator:
|
||||
return None
|
||||
|
||||
# 获取基本统计信息
|
||||
base_stats = self.material_consolidator.get_statistics()
|
||||
base_stats.update({
|
||||
'processed_files': self.processed_files,
|
||||
'total_files': self.total_files,
|
||||
'processed_rows': self.processed_rows
|
||||
})
|
||||
|
||||
# 生成报告
|
||||
output_path = self.report_generator.generate_consolidated_report(
|
||||
self.material_consolidator, base_stats
|
||||
)
|
||||
|
||||
if not output_path:
|
||||
return None
|
||||
|
||||
# 返回完整的统计信息
|
||||
base_stats['output_path'] = output_path
|
||||
return base_stats
|
||||
|
||||
|
||||
class UserInterface:
|
||||
"""用户界面处理器"""
|
||||
|
||||
@staticmethod
|
||||
def select_folder(title: str = "选择文件夹") -> str:
|
||||
"""选择文件夹"""
|
||||
root = tk.Tk()
|
||||
root.withdraw()
|
||||
folder_path = filedialog.askdirectory(title=title)
|
||||
root.destroy()
|
||||
return folder_path
|
||||
|
||||
@staticmethod
|
||||
def print_summary(stats: Dict[str, Any], folder_path: str):
|
||||
"""打印汇总信息"""
|
||||
print("\n" + "=" * 60)
|
||||
print("BOM合并检查完成!")
|
||||
print("=" * 60)
|
||||
print(f"处理文件夹: {folder_path}")
|
||||
print(f"扫描文件数: {stats['total_files']}")
|
||||
print(f"成功处理文件数: {stats['processed_files']}")
|
||||
print(f"处理数据行数: {stats['processed_rows']}")
|
||||
print(f"合并物料种类数: {stats['total_materials']}")
|
||||
print(f"存在问题的物料数: {stats['materials_with_issues']}")
|
||||
print(f"不一致问题总数: {stats['total_inconsistencies']}")
|
||||
|
||||
# 显示数据源文件信息
|
||||
files_info = stats.get('processed_files_info', [])
|
||||
print(f"有效数据源文件数: {len(files_info)}")
|
||||
for file_info in files_info:
|
||||
print(f" - {file_info.filename} (Sheet: {file_info.sheet_name}, 有效行: {file_info.valid_rows})")
|
||||
|
||||
print(f"报告文件: {stats['output_path']}")
|
||||
print("=" * 60)
|
||||
|
||||
# 额外显示输出文件夹信息
|
||||
output_dir = os.path.join(folder_path, "BOM_Merge_out")
|
||||
print(f"输出保存在: {output_dir}")
|
||||
|
||||
print("\n报告包含三个Sheet:")
|
||||
print("1. '汇总信息' - 处理统计和汇总信息")
|
||||
print("2. '数据源文件' - 有效数据源文件详细信息")
|
||||
print("3. 'BOM_Merge' - 合并后的物料数据")
|
||||
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
# 初始化处理器
|
||||
bom_processor = BOMProcessor()
|
||||
|
||||
# 选择文件夹
|
||||
folder_path = UserInterface.select_folder("选择包含BOM文件的文件夹")
|
||||
if not folder_path:
|
||||
print("未选择文件夹,程序退出")
|
||||
return
|
||||
|
||||
bom_processor.set_output_folder(folder_path)
|
||||
|
||||
# 处理文件
|
||||
print(f"开始处理文件夹: {folder_path}")
|
||||
success = bom_processor.process_folder(folder_path)
|
||||
|
||||
if not success:
|
||||
print("没有找到可处理的BOM文件")
|
||||
return
|
||||
|
||||
# 生成报告
|
||||
print("\n生成合并报告...")
|
||||
stats = bom_processor.generate_report()
|
||||
|
||||
if stats:
|
||||
UserInterface.print_summary(stats, folder_path)
|
||||
else:
|
||||
print("生成报告失败")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
input("\n按任意键退出...")
|
||||
14
BOMCompare/README.md
Normal file
14
BOMCompare/README.md
Normal file
@@ -0,0 +1,14 @@
|
||||
# Sample GitLab Project
|
||||
|
||||
This sample project shows how a project in GitLab looks for demonstration purposes. It contains issues, merge requests and Markdown files in many branches,
|
||||
named and filled with lorem ipsum.
|
||||
|
||||
You can look around to get an idea how to structure your project and, when done, you can safely delete this project.
|
||||
|
||||
[Learn more about creating GitLab projects.](https://docs.gitlab.com/ee/gitlab-basics/create-project.html)
|
||||
|
||||
# 基于标准格式的 BOM文件,输出 BOM差异信息文件
|
||||
BOMCompereForJP.py
|
||||
|
||||
# 基于标准格式的 BOM文件,输出 BOM的合并后的文件,方便校对和物料备料情况的分析。
|
||||
BOMConsolidator.py
|
||||
19
FFT_IMU/.gitignore
vendored
Normal file
19
FFT_IMU/.gitignore
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
/build/*
|
||||
/build
|
||||
/dist/*
|
||||
/dist
|
||||
/source/*
|
||||
/source
|
||||
/dataProcess_out*
|
||||
*.xls
|
||||
*.xlsx
|
||||
*.csv
|
||||
*.spec
|
||||
|
||||
/src
|
||||
|
||||
/temp
|
||||
|
||||
FFT_IMU_dc_html_v2.py
|
||||
|
||||
FFT_IMU_dc_v2.py
|
||||
739
FFT_IMU/FFT_IMU_dc_html_v1.py
Normal file
739
FFT_IMU/FFT_IMU_dc_html_v1.py
Normal file
@@ -0,0 +1,739 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib
|
||||
|
||||
matplotlib.use('Agg')
|
||||
import matplotlib.pyplot as plt
|
||||
from scipy import signal
|
||||
import os
|
||||
import glob
|
||||
from datetime import datetime
|
||||
import time
|
||||
from multiprocessing import Pool, cpu_count
|
||||
from matplotlib.colors import Normalize
|
||||
from matplotlib.ticker import MaxNLocator
|
||||
import re
|
||||
from colorama import Fore, Style, init
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
import warnings
|
||||
import threading
|
||||
|
||||
# 初始化colorama
|
||||
init(autoreset=True)
|
||||
|
||||
# 忽略特定的matplotlib警告
|
||||
warnings.filterwarnings("ignore", category=UserWarning, module="matplotlib")
|
||||
warnings.filterwarnings("ignore", category=FutureWarning, module="matplotlib")
|
||||
|
||||
# 创建线程锁,确保文件操作和日志输出的线程安全
|
||||
file_lock = threading.Lock()
|
||||
log_lock = threading.Lock()
|
||||
|
||||
|
||||
class IMUDataAnalyzer:
|
||||
def __init__(self, file_path):
|
||||
self.file_path = file_path
|
||||
self.data = None
|
||||
self.sampling_rate = None
|
||||
self.fig_size = (15, 10)
|
||||
self.spectrogram_params = {} # 存储频谱图计算参数
|
||||
|
||||
# 从文件名推断数据类型和采样率
|
||||
file_name = os.path.basename(file_path).lower()
|
||||
if 'calib' in file_name:
|
||||
self.data_type = 'calib'
|
||||
self.default_sampling_rate = 5
|
||||
elif 'raw' in file_name:
|
||||
self.data_type = 'raw'
|
||||
self.default_sampling_rate = 1000
|
||||
else:
|
||||
self.data_type = 'unknown'
|
||||
self.default_sampling_rate = 5
|
||||
|
||||
# 解析文件路径和文件名
|
||||
file_dir = os.path.dirname(os.path.abspath(file_path))
|
||||
file_base_name = os.path.splitext(os.path.basename(file_path))[0]
|
||||
self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
# 创建文件名称+时间戳尾缀的输出目录
|
||||
self.output_dir = os.path.join(file_dir, f"{file_base_name}_output_{self.timestamp}")
|
||||
|
||||
# 使用锁确保目录创建的线程安全
|
||||
with file_lock:
|
||||
if not os.path.exists(self.output_dir):
|
||||
os.makedirs(self.output_dir)
|
||||
self.log_progress(f"创建输出目录:{self.output_dir}", "INFO")
|
||||
|
||||
# 字体设置
|
||||
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'Arial']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
|
||||
# 设置matplotlib兼容性选项,避免布局引擎冲突
|
||||
plt.rcParams['figure.constrained_layout.use'] = False
|
||||
plt.rcParams['figure.constrained_layout.h_pad'] = 0.02
|
||||
plt.rcParams['figure.constrained_layout.w_pad'] = 0.02
|
||||
plt.rcParams['figure.constrained_layout.hspace'] = 0.02
|
||||
plt.rcParams['figure.constrained_layout.wspace'] = 0.02
|
||||
|
||||
self.log_progress(f"处理文件:{self.file_path}", "INFO")
|
||||
self.log_progress(f"数据类型:{self.data_type}", "INFO")
|
||||
self.log_progress(f"输出路径:{self.output_dir}", "INFO")
|
||||
|
||||
def log_progress(self, message, level="INFO"):
|
||||
"""带颜色和级别的日志输出(线程安全)"""
|
||||
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||
with log_lock:
|
||||
if level == "INFO":
|
||||
print(f"{Fore.CYAN}[{timestamp}] {Fore.GREEN}{message}")
|
||||
elif level == "WARNING":
|
||||
print(f"{Fore.CYAN}[{timestamp}] {Fore.YELLOW}警告: {message}")
|
||||
elif level == "ERROR":
|
||||
print(f"{Fore.CYAN}[{timestamp}] {Fore.RED}错误: {message}")
|
||||
elif level == "SUCCESS":
|
||||
print(f"{Fore.CYAN}[{timestamp}] {Fore.GREEN}✓ {message}")
|
||||
else:
|
||||
print(f"{Fore.CYAN}[{timestamp}] {message}")
|
||||
|
||||
def check_imu_columns_in_file(self):
|
||||
"""检查文件是否包含IMU数据列(通过读取文件头)"""
|
||||
try:
|
||||
# 只读取第一行来检查列名
|
||||
with open(self.file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
first_line = f.readline().strip()
|
||||
|
||||
# 检查第一行是否包含imu关键词(不区分大小写)
|
||||
if re.search(r'imu', first_line, re.IGNORECASE):
|
||||
return True
|
||||
else:
|
||||
self.log_progress(f"文件头部不包含'imu'关键词,跳过处理,first_line {first_line}", "WARNING")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
self.log_progress(f"检查文件头部时出错: {str(e)}", "ERROR")
|
||||
return False
|
||||
|
||||
def detect_imu_columns(self):
|
||||
"""自动检测IMU数据列"""
|
||||
all_columns = self.data.columns.tolist()
|
||||
|
||||
# 查找imu前缀(如imu1, imu2等)
|
||||
imu_prefixes = set()
|
||||
for col in all_columns:
|
||||
match = re.match(r'^(imu\d+)_', col.lower())
|
||||
if match:
|
||||
imu_prefixes.add(match.group(1))
|
||||
|
||||
if not imu_prefixes:
|
||||
self.log_progress("未检测到IMU数据列,尝试使用默认列名", "WARNING")
|
||||
# 尝试使用常见列名
|
||||
self.acc_columns = ['imu1_acc_x', 'imu1_acc_y', 'imu1_acc_z']
|
||||
self.gyro_columns = ['imu1_gyro_x', 'imu1_gyro_y', 'imu1_gyro_z']
|
||||
self.temp_columns = ['imu1_temp']
|
||||
return
|
||||
|
||||
# 使用第一个检测到的IMU前缀
|
||||
imu_prefix = list(imu_prefixes)[0]
|
||||
self.log_progress(f"检测到IMU前缀: {imu_prefix}", "INFO")
|
||||
|
||||
# 查找加速度计列
|
||||
self.acc_columns = [col for col in all_columns
|
||||
if col.lower().startswith(f"{imu_prefix}_acc") and
|
||||
any(axis in col.lower() for axis in ['_x', '_y', '_z'])]
|
||||
|
||||
# 查找陀螺仪列
|
||||
self.gyro_columns = [col for col in all_columns
|
||||
if col.lower().startswith(f"{imu_prefix}_gyro") and
|
||||
any(axis in col.lower() for axis in ['_x', '_y', '_z'])]
|
||||
|
||||
# 查找温度列
|
||||
self.temp_columns = [col for col in all_columns
|
||||
if col.lower().startswith(f"{imu_prefix}_temp")]
|
||||
|
||||
# 如果没有找到温度列,尝试其他常见名称
|
||||
if not self.temp_columns:
|
||||
self.temp_columns = [col for col in all_columns
|
||||
if any(name in col.lower() for name in ['temp', 'temperature'])]
|
||||
|
||||
self.log_progress(f"加速度计列: {self.acc_columns}", "INFO")
|
||||
self.log_progress(f"陀螺仪列: {self.gyro_columns}", "INFO")
|
||||
self.log_progress(f"温度列: {self.temp_columns}", "INFO")
|
||||
|
||||
def estimate_sampling_rate(self):
|
||||
"""估计实际采样率"""
|
||||
if 'time' in self.data.columns and len(self.data) > 10:
|
||||
time_diff = np.diff(self.data['time'].values)
|
||||
valid_diffs = time_diff[(time_diff > 0) & (time_diff < 10)] # 排除异常值
|
||||
if len(valid_diffs) > 0:
|
||||
estimated_rate = 1.0 / np.median(valid_diffs)
|
||||
self.log_progress(f"根据时间戳估计的采样率: {estimated_rate:.2f} Hz")
|
||||
return estimated_rate
|
||||
|
||||
# 如果没有时间列或无法估计,使用基于文件名的默认值
|
||||
self.log_progress(f"使用基于文件名的默认采样率: {self.default_sampling_rate} Hz")
|
||||
return self.default_sampling_rate
|
||||
|
||||
def load_data(self):
|
||||
"""加载并预处理数据"""
|
||||
self.log_progress("开始加载数据...")
|
||||
start_time = time.time()
|
||||
|
||||
# 首先检查文件是否包含IMU数据
|
||||
if not self.check_imu_columns_in_file():
|
||||
raise ValueError("文件不包含IMU数据列,跳过处理")
|
||||
|
||||
# 使用锁确保文件读取的线程安全
|
||||
with file_lock:
|
||||
self.data = pd.read_csv(self.file_path)
|
||||
|
||||
self.log_progress(f"数据加载完成,共 {len(self.data)} 行,耗时 {time.time() - start_time:.2f}秒")
|
||||
|
||||
# 检测IMU数据列
|
||||
self.detect_imu_columns()
|
||||
|
||||
# 估计采样率
|
||||
self.sampling_rate = self.estimate_sampling_rate()
|
||||
|
||||
# 创建时间序列并处理异常时间值
|
||||
if 'time' in self.data.columns:
|
||||
valid_time_mask = (self.data['time'] > 0) & (self.data['time'] < 1e6)
|
||||
self.data = self.data[valid_time_mask].copy()
|
||||
self.data['time'] = np.arange(len(self.data)) / self.sampling_rate
|
||||
else:
|
||||
# 如果没有时间列,创建基于采样率的时间序列
|
||||
self.data['time'] = np.arange(len(self.data)) / self.sampling_rate
|
||||
|
||||
def remove_dc(self, signal_data):
|
||||
"""不移除直流分量(保留以在频谱中显示 DC)"""
|
||||
return signal_data
|
||||
|
||||
def compute_spectrogram(self, signal_data):
|
||||
"""计算频谱图(保留直流分量),优化频谱分辨率和减少颗粒感"""
|
||||
# 保留直流分量
|
||||
signal_data = self.remove_dc(signal_data)
|
||||
|
||||
# 数据长度
|
||||
n_samples = len(signal_data)
|
||||
|
||||
# 根据采样率和数据长度自适应选择参数
|
||||
if self.sampling_rate <= 10: # 低采样率(5Hz)
|
||||
# 对于低采样率,使用较长的窗口以获得更好的频率分辨率
|
||||
nperseg = min(256, max(64, n_samples // 2))
|
||||
noverlap = int(nperseg * 0.75) # 增加重叠比例
|
||||
|
||||
else: # 高采样率(1000Hz)
|
||||
# 对于高采样率,平衡时间分辨率和频率分辨率
|
||||
if n_samples < 10000: # 较短的数据
|
||||
nperseg = min(512, max(256, n_samples // 4))
|
||||
else: # 较长的数据
|
||||
nperseg = min(1024, max(512, n_samples // 8))
|
||||
|
||||
noverlap = int(nperseg * 0.66) # 适中的重叠比例
|
||||
|
||||
# 确保窗口大小合理
|
||||
nperseg = max(16, min(nperseg, n_samples))
|
||||
noverlap = min(noverlap, nperseg - 1)
|
||||
|
||||
# 记录频谱图计算参数
|
||||
self.spectrogram_params = {
|
||||
"nperseg": nperseg,
|
||||
"noverlap": noverlap,
|
||||
"window": "hamming",
|
||||
"detrend": False,
|
||||
"scaling": "density",
|
||||
"mode": "psd"
|
||||
}
|
||||
|
||||
# 使用更平滑的窗口函数
|
||||
f, t, Sxx = signal.spectrogram(
|
||||
signal_data,
|
||||
fs=self.sampling_rate,
|
||||
window='hamming', # 使用汉明窗,比汉宁窗更平滑
|
||||
nperseg=nperseg,
|
||||
noverlap=noverlap,
|
||||
scaling='density',
|
||||
detrend=False, # 保留直流
|
||||
mode='psd'
|
||||
)
|
||||
|
||||
# 应用平滑处理以减少颗粒感
|
||||
if Sxx.size > 0:
|
||||
# 使用小范围的高斯滤波平滑(可选)
|
||||
from scipy.ndimage import gaussian_filter
|
||||
Sxx_smoothed = gaussian_filter(Sxx, sigma=0.7)
|
||||
return f, t, Sxx_smoothed
|
||||
|
||||
return f, t, Sxx
|
||||
|
||||
def process_signal(self, args):
|
||||
"""并行处理单个信号"""
|
||||
signal_data, axis = args
|
||||
f, t, Sxx = self.compute_spectrogram(signal_data)
|
||||
|
||||
# 防止 log10(0)
|
||||
eps = np.finfo(float).eps
|
||||
Sxx_log = 10 * np.log10(Sxx + eps)
|
||||
|
||||
# 降采样以加速绘图
|
||||
if len(t) > 1000: # 如果时间点太多,进行降采样
|
||||
time_indices = np.linspace(0, len(t) - 1, 1000, dtype=int)
|
||||
freq_indices = np.linspace(0, len(f) - 1, 500, dtype=int)
|
||||
t = t[time_indices]
|
||||
f = f[freq_indices]
|
||||
Sxx_log = Sxx_log[freq_indices, :][:, time_indices]
|
||||
dc_idx = int(np.argmin(np.abs(f - 0.0)))
|
||||
dc_log = Sxx_log[dc_idx, :] # shape: (len(t),)
|
||||
|
||||
# 更健壮的 0 Hz 索引选择
|
||||
zero_idx = np.where(np.isclose(f, 0.0))[0]
|
||||
dc_idx = int(zero_idx[0]) if len(zero_idx) > 0 else int(np.argmin(np.abs(f - 0.0)))
|
||||
dc_log = Sxx_log[dc_idx, :] # 每个时间窗的 0 Hz PSD(dB)
|
||||
|
||||
return {
|
||||
'f': f,
|
||||
't': t,
|
||||
'Sxx_log': Sxx_log,
|
||||
'dc_log': dc_log,
|
||||
'axis': axis
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def robust_dc_ylim(results, p_low=5, p_high=95, pad_ratio=0.05, fallback=(0.0, 1.0)):
|
||||
"""
|
||||
计算统一 DC 纵轴范围(分位数 + 少许边距),并过滤 inf/NaN
|
||||
"""
|
||||
if not results:
|
||||
return fallback
|
||||
dc_all = np.concatenate([r['dc_log'].ravel() for r in results])
|
||||
dc_all = dc_all[np.isfinite(dc_all)]
|
||||
if dc_all.size == 0:
|
||||
return fallback
|
||||
lo, hi = np.percentile(dc_all, [p_low, p_high])
|
||||
span = max(1e-9, hi - lo)
|
||||
lo -= span * pad_ratio
|
||||
hi += span * pad_ratio
|
||||
return lo, hi
|
||||
|
||||
def get_time_domain_stats(self):
|
||||
"""计算时域信号的统计信息"""
|
||||
stats = {}
|
||||
if self.acc_columns:
|
||||
stats['加速度计'] = {col: {
|
||||
'均值': self.data[col].mean(),
|
||||
'标准差': self.data[col].std(),
|
||||
'最大值': self.data[col].max(),
|
||||
'最小值': self.data[col].min()
|
||||
} for col in self.acc_columns}
|
||||
if self.gyro_columns:
|
||||
stats['陀螺仪'] = {col: {
|
||||
'均值': self.data[col].mean(),
|
||||
'标准差': self.data[col].std(),
|
||||
'最大值': self.data[col].max(),
|
||||
'最小值': self.data[col].min()
|
||||
} for col in self.gyro_columns}
|
||||
if self.temp_columns:
|
||||
stats['温度'] = {col: {
|
||||
'均值': self.data[col].mean(),
|
||||
'标准差': self.data[col].std(),
|
||||
'最大值': self.data[col].max(),
|
||||
'最小值': self.data[col].min()
|
||||
} for col in self.temp_columns}
|
||||
return stats
|
||||
|
||||
def generate_html_report(self, time_domain_stats):
|
||||
"""生成HTML报告"""
|
||||
html_content = f"""
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>IMU数据分析报告 - {os.path.basename(self.file_path)}</title>
|
||||
<style>
|
||||
body {{ font-family: Arial, sans-serif; margin: 20px; }}
|
||||
h1, h2, h3 {{ color: #333; }}
|
||||
table {{ border-collapse: collapse; width: 100%; margin-bottom: 20px; }}
|
||||
th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
|
||||
th {{ background-color: #f2f2f2; }}
|
||||
img {{ max-width: 100%; height: auto; display: block; margin: 10px 0; }}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>IMU数据分析报告</h1>
|
||||
<p><strong>文件路径:</strong> {self.file_path}</p>
|
||||
<p><strong>分析时间:</strong> {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
|
||||
<p><strong>采样率:</strong> {self.sampling_rate} Hz</p>
|
||||
|
||||
<h2>时域信号统计信息</h2>
|
||||
"""
|
||||
|
||||
# 添加时域统计信息
|
||||
for sensor_type, sensors in time_domain_stats.items():
|
||||
html_content += f"<h3>{sensor_type}</h3>"
|
||||
html_content += "<table>"
|
||||
html_content += "<tr><th>传感器</th><th>均值</th><th>标准差</th><th>最大值</th><th>最小值</th></tr>"
|
||||
for col, stats in sensors.items():
|
||||
html_content += f"<tr><td>{col}</td><td>{stats['均值']:.4f}</td><td>{stats['标准差']:.4f}</td><td>{stats['最大值']:.4f}</td><td>{stats['最小值']:.4f}</td></tr>"
|
||||
html_content += "</table>"
|
||||
|
||||
# 添加频域参数信息
|
||||
html_content += """
|
||||
<h2>频域信号计算参数</h2>
|
||||
<table>
|
||||
<tr><th>参数</th><th>值</th></tr>
|
||||
"""
|
||||
for key, value in self.spectrogram_params.items():
|
||||
html_content += f"<tr><td>{key}</td><td>{value}</td></tr>"
|
||||
html_content += "</table>"
|
||||
|
||||
# 添加图像链接
|
||||
time_series_image = f'time_series_{self.timestamp}.png'
|
||||
acc_spectrogram_image = f'acc_rainfall_spectrogram_{self.timestamp}.png'
|
||||
gyro_spectrogram_image = f'gyro_rainfall_spectrogram_{self.timestamp}.png'
|
||||
|
||||
html_content += f"""
|
||||
<h2>时域信号图</h2>
|
||||
<img src="{time_series_image}" alt="时域信号图">
|
||||
|
||||
<h2>加速度计频谱雨点图</h2>
|
||||
<img src="{acc_spectrogram_image}" alt="加速度计频谱雨点图">
|
||||
|
||||
<h2>陀螺仪频谱雨点图</h2>
|
||||
<img src="{gyro_spectrogram_image}" alt="陀螺仪频谱雨点图">
|
||||
"""
|
||||
|
||||
html_content += """
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
# 保存HTML报告
|
||||
report_path = os.path.join(self.output_dir, f'report_{self.timestamp}.html')
|
||||
with open(report_path, 'w', encoding='utf-8') as f:
|
||||
f.write(html_content)
|
||||
|
||||
self.log_progress(f"HTML报告已生成: {report_path}")
|
||||
|
||||
def plot_time_series(self):
|
||||
"""绘制时间序列图"""
|
||||
self.log_progress("开始绘制时间序列图...")
|
||||
start_time = time.time()
|
||||
|
||||
# 确定子图数量
|
||||
n_plots = 1 # 至少有一个加速度图
|
||||
if self.gyro_columns: # 如果有陀螺仪数据
|
||||
n_plots += 1
|
||||
if self.temp_columns: # 如果有温度数据
|
||||
n_plots += 1
|
||||
|
||||
fig, axes = plt.subplots(n_plots, 1, figsize=(12, 3 * n_plots), dpi=120)
|
||||
if n_plots == 1:
|
||||
axes = [axes] # 确保axes是列表
|
||||
|
||||
plot_idx = 0
|
||||
|
||||
# 加速度计数据
|
||||
if self.acc_columns:
|
||||
ax = axes[plot_idx]
|
||||
colors = ['#1f77b4', '#ff7f0e', '#2ca02c']
|
||||
labels = ['X', 'Y', 'Z']
|
||||
for i, col in enumerate(self.acc_columns):
|
||||
if i < 3: # 只绘制前三个轴
|
||||
ax.plot(self.data['time'], self.data[col],
|
||||
label=labels[i], color=colors[i], linewidth=1.0, alpha=0.8)
|
||||
ax.set_title('加速度时间序列', fontsize=12)
|
||||
ax.set_ylabel('加速度 (g)', fontsize=10)
|
||||
ax.legend(loc='upper right', fontsize=8, framealpha=0.5)
|
||||
ax.grid(True, linestyle=':', alpha=0.5)
|
||||
ax.set_xlim(0, self.data['time'].max())
|
||||
plot_idx += 1
|
||||
|
||||
# 陀螺仪数据(如果有)
|
||||
if self.gyro_columns and plot_idx < n_plots:
|
||||
ax = axes[plot_idx]
|
||||
colors = ['#1f77b4', '#ff7f0e', '#2ca02c']
|
||||
labels = ['X', 'Y', 'Z']
|
||||
for i, col in enumerate(self.gyro_columns):
|
||||
if i < 3: # 只绘制前三个轴
|
||||
ax.plot(self.data['time'], self.data[col],
|
||||
label=labels[i], color=colors[i], linewidth=1.0, alpha=0.8)
|
||||
ax.set_title('陀螺仪时间序列', fontsize=12)
|
||||
ax.set_ylabel('角速度 (deg/s)', fontsize=10)
|
||||
ax.legend(loc='upper left', fontsize=8, framealpha=0.5)
|
||||
ax.grid(True, linestyle=':', alpha=0.5)
|
||||
ax.set_xlim(0, self.data['time'].max())
|
||||
plot_idx += 1
|
||||
|
||||
# 温度数据(如果有)
|
||||
if self.temp_columns and plot_idx < n_plots:
|
||||
ax = axes[plot_idx]
|
||||
ax.plot(self.data['time'], self.data[self.temp_columns[0]],
|
||||
label='温度', color='#9467bd', linewidth=1.0, alpha=0.8)
|
||||
ax.set_title('温度时间序列', fontsize=12)
|
||||
ax.set_xlabel('时间 (s)', fontsize=10)
|
||||
ax.set_ylabel('温度 (°C)', fontsize=10)
|
||||
ax.legend(loc='upper right', fontsize=8, framealpha=0.5)
|
||||
ax.grid(True, linestyle=':', alpha=0.5)
|
||||
ax.set_xlim(0, self.data['time'].max())
|
||||
|
||||
plt.tight_layout()
|
||||
output_path = os.path.join(self.output_dir, f'time_series_{self.timestamp}.png')
|
||||
plt.savefig(output_path, bbox_inches='tight', dpi=150)
|
||||
plt.close(fig)
|
||||
self.log_progress(f"时间序列图已保存: {output_path}")
|
||||
self.log_progress(f"时间序列图已保存为 {output_path},耗时 {time.time() - start_time:.2f}秒")
|
||||
|
||||
def plot_rainfall_spectrograms(self):
|
||||
"""并行绘制所有频谱雨点图(修复colorbar布局问题)"""
|
||||
self.log_progress("开始并行绘制频谱雨点图...")
|
||||
start_time = time.time()
|
||||
|
||||
# 准备加速度计数据
|
||||
self.log_progress("准备加速度计数据...")
|
||||
acc_signals = [(self.data[col], f'Acc {["X", "Y", "Z"][i]}')
|
||||
for i, col in enumerate(self.acc_columns) if i < 3] # 只处理前三个轴
|
||||
|
||||
# 准备陀螺仪数据(如果有)
|
||||
gyro_signals = []
|
||||
if self.gyro_columns:
|
||||
self.log_progress("准备陀螺仪数据...")
|
||||
gyro_signals = [(self.data[col], f'Gyro {["X", "Y", "Z"][i]}')
|
||||
for i, col in enumerate(self.gyro_columns) if i < 3] # 只处理前三个轴
|
||||
|
||||
# 如果没有数据可处理,直接返回
|
||||
if not acc_signals and not gyro_signals:
|
||||
self.log_progress("警告: 没有有效的数据列可供处理", "WARNING")
|
||||
return
|
||||
|
||||
# 使用多进程处理信号(避免线程冲突)
|
||||
self.log_progress("使用多进程并行处理...")
|
||||
all_signals = acc_signals + gyro_signals
|
||||
with Pool(processes=min(len(all_signals), cpu_count())) as pool:
|
||||
results = pool.map(self.process_signal, all_signals)
|
||||
|
||||
# 分离结果
|
||||
self.log_progress("分离结果...")
|
||||
acc_results = [r for r in results if r['axis'].startswith('Acc')]
|
||||
gyro_results = [r for r in results if r['axis'].startswith('Gyro')]
|
||||
|
||||
# 统一颜色标尺(5%-95%分位)
|
||||
if acc_results:
|
||||
self.log_progress("计算加速度计全局最小和最大值...")
|
||||
acc_all_Sxx = np.concatenate([r['Sxx_log'].ravel() for r in acc_results])
|
||||
acc_vmin, acc_vmax = np.percentile(acc_all_Sxx, [5, 95])
|
||||
|
||||
# 统一 DC Y 轴范围
|
||||
acc_dc_ymin, acc_dc_ymax = self.robust_dc_ylim(acc_results)
|
||||
self.log_progress(f"加速度 DC (dB) 范围: {acc_dc_ymin:.1f} 到 {acc_dc_ymax:.1f}")
|
||||
|
||||
if gyro_results:
|
||||
self.log_progress("计算陀螺仪全局最小和最大值...")
|
||||
gyro_all_Sxx = np.concatenate([r['Sxx_log'].ravel() for r in gyro_results])
|
||||
gyro_vmin, gyro_vmax = np.percentile(gyro_all_Sxx, [5, 95])
|
||||
|
||||
# 统一 DC Y 轴范围
|
||||
gyro_dc_ymin, gyro_dc_ymax = self.robust_dc_ylim(gyro_results)
|
||||
self.log_progress(f"陀螺仪 DC (dB) 范围: {gyro_dc_ymin:.1f} 到 {gyro_dc_ymax:.1f}")
|
||||
|
||||
# ========= 绘制加速度计频谱雨点图 =========
|
||||
if acc_results:
|
||||
self._plot_single_spectrogram(acc_results, acc_vmin, acc_vmax, acc_dc_ymin, acc_dc_ymax,
|
||||
'加速度', 'acc_rainfall_spectrogram')
|
||||
self.log_progress(f"加速度功率谱密度范围: {acc_vmin:.1f} dB 到 {acc_vmax:.1f} dB")
|
||||
|
||||
# ========= 绘制陀螺仪频谱雨点图 =========
|
||||
if gyro_results:
|
||||
self._plot_single_spectrogram(gyro_results, gyro_vmin, gyro_vmax, gyro_dc_ymin, gyro_dc_ymax,
|
||||
'角速度', 'gyro_rainfall_spectrogram')
|
||||
self.log_progress(f"陀螺仪功率谱密度范围: {gyro_vmin:.1f} dB 到 {gyro_vmax:.1f} dB")
|
||||
|
||||
total_time = time.time() - start_time
|
||||
self.log_progress(f"频谱雨点图生成完成,总耗时 {total_time:.2f}秒")
|
||||
|
||||
def _plot_single_spectrogram(self, results, vmin, vmax, dc_ymin, dc_ymax, title_prefix, filename_prefix):
|
||||
"""绘制单个频谱雨点图"""
|
||||
rows = len(results)
|
||||
fig = plt.figure(constrained_layout=True, figsize=(14, 4 * rows), dpi=150)
|
||||
gs = fig.add_gridspec(nrows=rows, ncols=2, width_ratios=[22, 1], wspace=0.05, hspace=0.12)
|
||||
|
||||
axes_main = []
|
||||
axes_cbar = []
|
||||
for i in range(rows):
|
||||
axes_main.append(fig.add_subplot(gs[i, 0]))
|
||||
axes_cbar.append(fig.add_subplot(gs[i, 1]))
|
||||
|
||||
for i, result in enumerate(results):
|
||||
ax = axes_main[i]
|
||||
cax = axes_cbar[i]
|
||||
|
||||
sc = ax.scatter(
|
||||
np.repeat(result['t'], len(result['f'])),
|
||||
np.tile(result['f'], len(result['t'])),
|
||||
c=result['Sxx_log'].T.ravel(),
|
||||
cmap='jet',
|
||||
s=3,
|
||||
alpha=0.7,
|
||||
vmin=vmin,
|
||||
vmax=vmax,
|
||||
rasterized=True
|
||||
)
|
||||
|
||||
ax.set_title(f'{title_prefix}频谱雨点图 - {result["axis"][-1]}(右侧为DC分量 dB)', fontsize=10)
|
||||
ax.set_xlabel('时间 (s)', fontsize=9)
|
||||
ax.set_ylabel('频率 (Hz)', fontsize=9)
|
||||
ax.set_ylim(0, self.sampling_rate / 2)
|
||||
ax.grid(True, linestyle=':', alpha=0.4)
|
||||
|
||||
ax2 = ax.twinx()
|
||||
ax2.plot(result['t'], result['dc_log'], color='black', linewidth=1.2, alpha=0.85, label='DC (dB)')
|
||||
ax2.set_ylabel('直流分量 (dB)', fontsize=9, color='black')
|
||||
ax2.set_ylim(dc_ymin, dc_ymax)
|
||||
ax2.tick_params(axis='y', labelcolor='black')
|
||||
ax2.yaxis.set_major_locator(MaxNLocator(nbins=6))
|
||||
ax2.grid(False)
|
||||
ax2.legend(loc='upper right', fontsize=8, framealpha=0.5)
|
||||
|
||||
cbar = fig.colorbar(sc, cax=cax)
|
||||
cbar.set_label('功率谱密度 (dB)', fontsize=9)
|
||||
cax.tick_params(labelsize=8)
|
||||
|
||||
output_path = os.path.join(self.output_dir, f'{filename_prefix}_{self.timestamp}.png')
|
||||
plt.savefig(output_path, bbox_inches='tight', dpi=150)
|
||||
plt.close(fig)
|
||||
self.log_progress(f"{title_prefix}频谱雨点图已保存为 {output_path}")
|
||||
|
||||
def run_analysis(self):
|
||||
"""运行完整分析流程"""
|
||||
try:
|
||||
self.log_progress("开始数据分析流程", "INFO")
|
||||
start_time = time.time()
|
||||
|
||||
self.load_data()
|
||||
self.plot_time_series()
|
||||
self.plot_rainfall_spectrograms()
|
||||
|
||||
# 计算时域统计信息
|
||||
time_domain_stats = self.get_time_domain_stats()
|
||||
|
||||
# 生成HTML报告
|
||||
self.generate_html_report(time_domain_stats)
|
||||
|
||||
total_time = time.time() - start_time
|
||||
self.log_progress(f"分析完成,总耗时 {total_time:.2f}秒", "SUCCESS")
|
||||
self.log_progress(f"所有输出文件已保存到: {self.output_dir}", "INFO")
|
||||
return True
|
||||
|
||||
except ValueError as e:
|
||||
# 跳过不包含IMU数据的文件
|
||||
self.log_progress(f"跳过文件: {str(e)}", "WARNING")
|
||||
return False
|
||||
except Exception as e:
|
||||
self.log_progress(f"分析过程中出现错误: {str(e)}", "ERROR")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
|
||||
def process_single_file(file_path):
|
||||
"""处理单个文件的函数(使用进程隔离)"""
|
||||
try:
|
||||
print(f"{Fore.BLUE}开始处理文件: {os.path.basename(file_path)}")
|
||||
analyzer = IMUDataAnalyzer(file_path)
|
||||
success = analyzer.run_analysis()
|
||||
if success:
|
||||
return (file_path, True, "处理成功")
|
||||
else:
|
||||
return (file_path, False, "文件不包含IMU数据,已跳过")
|
||||
except Exception as e:
|
||||
return (file_path, False, str(e))
|
||||
|
||||
|
||||
def main():
|
||||
"""主函数,支持多文件处理和进度显示"""
|
||||
print("=" * 60)
|
||||
print(f"{Fore.CYAN}IMU数据频谱分析工具 - 多文件批量处理")
|
||||
print("=" * 60)
|
||||
|
||||
# 获取输入路径
|
||||
print(f"{Fore.WHITE}请输入包含CSV文件的目录路径: ")
|
||||
input_path = input("> ").strip()
|
||||
|
||||
if not os.path.exists(input_path):
|
||||
print(f"{Fore.RED}错误: 路径 '{input_path}' 不存在!")
|
||||
return
|
||||
|
||||
# 查找所有包含imu的CSV文件(不区分大小写)
|
||||
if os.path.isdir(input_path):
|
||||
# 使用单个glob模式匹配所有文件,然后过滤包含imu的文件
|
||||
all_csv_files = glob.glob(os.path.join(input_path, "**", "*.csv"), recursive=True)
|
||||
csv_files = [f for f in all_csv_files if re.search(r'imu', f, re.IGNORECASE)]
|
||||
csv_files = list(set(csv_files)) # 去重
|
||||
csv_files.sort()
|
||||
else:
|
||||
# 对于单个文件,检查是否包含imu(不区分大小写)
|
||||
if re.search(r'imu', input_path, re.IGNORECASE):
|
||||
csv_files = [input_path]
|
||||
else:
|
||||
csv_files = []
|
||||
|
||||
if not csv_files:
|
||||
print(f"{Fore.YELLOW}警告: 未找到包含'imu'的CSV文件")
|
||||
return
|
||||
|
||||
print(f"{Fore.GREEN}找到 {len(csv_files)} 个IMU数据文件:")
|
||||
for i, file in enumerate(csv_files, 1):
|
||||
print(f" {i}. {os.path.basename(file)}")
|
||||
|
||||
# 使用多进程处理文件(避免matplotlib线程冲突)
|
||||
print(f"\n{Fore.CYAN}开始多线程处理文件 (使用 {min(len(csv_files), cpu_count())} 个线程)...")
|
||||
|
||||
success_count = 0
|
||||
skipped_count = 0
|
||||
failed_files = []
|
||||
|
||||
# 使用ProcessPoolExecutor而不是ThreadPoolExecutor
|
||||
with ProcessPoolExecutor(max_workers=min(len(csv_files), cpu_count())) as executor:
|
||||
# 提交所有任务
|
||||
future_to_file = {executor.submit(process_single_file, file): file for file in csv_files}
|
||||
|
||||
# 处理完成的任务
|
||||
for future in as_completed(future_to_file):
|
||||
file_path = future_to_file[future]
|
||||
try:
|
||||
result = future.result()
|
||||
file_path, success, message = result
|
||||
if success:
|
||||
print(f"{Fore.GREEN}✓ 完成: {os.path.basename(file_path)}")
|
||||
success_count += 1
|
||||
else:
|
||||
if "跳过" in message:
|
||||
print(f"{Fore.YELLOW}↷ 跳过: {os.path.basename(file_path)} - {message}")
|
||||
skipped_count += 1
|
||||
else:
|
||||
print(f"{Fore.RED}✗ 失败: {os.path.basename(file_path)} - {message}")
|
||||
failed_files.append((file_path, message))
|
||||
except Exception as e:
|
||||
print(f"{Fore.RED}✗ 异常: {os.path.basename(file_path)} - {str(e)}")
|
||||
failed_files.append((file_path, str(e)))
|
||||
|
||||
# 输出统计信息
|
||||
print(f"\n{Fore.CYAN}处理完成统计:")
|
||||
print(f"{Fore.GREEN}成功: {success_count} 个文件")
|
||||
print(f"{Fore.YELLOW}跳过: {skipped_count} 个文件(不包含IMU数据)")
|
||||
print(f"{Fore.RED}失败: {len(failed_files)} 个文件")
|
||||
|
||||
if failed_files:
|
||||
print(f"\n{Fore.YELLOW}失败文件详情:")
|
||||
for file, error in failed_files:
|
||||
print(f" {os.path.basename(file)}: {error}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main()
|
||||
except KeyboardInterrupt:
|
||||
print(f"\n{Fore.YELLOW}用户中断程序执行")
|
||||
except Exception as e:
|
||||
print(f"{Fore.RED}程序运行出错: {str(e)}")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
648
FFT_IMU/FFT_IMU_dc_scan_v1.py
Normal file
648
FFT_IMU/FFT_IMU_dc_scan_v1.py
Normal file
@@ -0,0 +1,648 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib
|
||||
|
||||
matplotlib.use('Agg')
|
||||
import matplotlib.pyplot as plt
|
||||
from scipy import signal
|
||||
import os
|
||||
import glob
|
||||
from datetime import datetime
|
||||
import time
|
||||
from multiprocessing import Pool, cpu_count
|
||||
from matplotlib.colors import Normalize
|
||||
from matplotlib.ticker import MaxNLocator
|
||||
import re
|
||||
from colorama import Fore, Style, init
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
import warnings
|
||||
import threading
|
||||
|
||||
# 初始化colorama
|
||||
init(autoreset=True)
|
||||
|
||||
# 忽略特定的matplotlib警告
|
||||
warnings.filterwarnings("ignore", category=UserWarning, module="matplotlib")
|
||||
warnings.filterwarnings("ignore", category=FutureWarning, module="matplotlib")
|
||||
|
||||
# 创建线程锁,确保文件操作和日志输出的线程安全
|
||||
file_lock = threading.Lock()
|
||||
log_lock = threading.Lock()
|
||||
|
||||
|
||||
class IMUDataAnalyzer:
|
||||
def __init__(self, file_path):
|
||||
self.file_path = file_path
|
||||
self.data = None
|
||||
self.sampling_rate = None
|
||||
self.fig_size = (15, 10)
|
||||
|
||||
# 从文件名推断数据类型和采样率
|
||||
file_name = os.path.basename(file_path).lower()
|
||||
if 'calib' in file_name:
|
||||
self.data_type = 'calib'
|
||||
self.default_sampling_rate = 5
|
||||
elif 'raw' in file_name:
|
||||
self.data_type = 'raw'
|
||||
self.default_sampling_rate = 1000
|
||||
else:
|
||||
self.data_type = 'unknown'
|
||||
self.default_sampling_rate = 5
|
||||
|
||||
# 解析文件路径和文件名
|
||||
file_dir = os.path.dirname(os.path.abspath(file_path))
|
||||
file_base_name = os.path.splitext(os.path.basename(file_path))[0]
|
||||
self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
# 创建文件名称+时间戳尾缀的输出目录
|
||||
self.output_dir = os.path.join(file_dir, f"{file_base_name}_output_{self.timestamp}")
|
||||
|
||||
# 使用锁确保目录创建的线程安全
|
||||
with file_lock:
|
||||
if not os.path.exists(self.output_dir):
|
||||
os.makedirs(self.output_dir)
|
||||
self.log_progress(f"创建输出目录:{self.output_dir}", "INFO")
|
||||
|
||||
# 字体设置
|
||||
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'Arial']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
|
||||
# 设置matplotlib兼容性选项,避免布局引擎冲突
|
||||
plt.rcParams['figure.constrained_layout.use'] = False
|
||||
plt.rcParams['figure.constrained_layout.h_pad'] = 0.02
|
||||
plt.rcParams['figure.constrained_layout.w_pad'] = 0.02
|
||||
plt.rcParams['figure.constrained_layout.hspace'] = 0.02
|
||||
plt.rcParams['figure.constrained_layout.wspace'] = 0.02
|
||||
|
||||
self.log_progress(f"处理文件:{self.file_path}", "INFO")
|
||||
self.log_progress(f"数据类型:{self.data_type}", "INFO")
|
||||
self.log_progress(f"输出路径:{self.output_dir}", "INFO")
|
||||
|
||||
def log_progress(self, message, level="INFO"):
|
||||
"""带颜色和级别的日志输出(线程安全)"""
|
||||
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||
with log_lock:
|
||||
if level == "INFO":
|
||||
print(f"{Fore.CYAN}[{timestamp}] {Fore.GREEN}{message}")
|
||||
elif level == "WARNING":
|
||||
print(f"{Fore.CYAN}[{timestamp}] {Fore.YELLOW}警告: {message}")
|
||||
elif level == "ERROR":
|
||||
print(f"{Fore.CYAN}[{timestamp}] {Fore.RED}错误: {message}")
|
||||
elif level == "SUCCESS":
|
||||
print(f"{Fore.CYAN}[{timestamp}] {Fore.GREEN}✓ {message}")
|
||||
else:
|
||||
print(f"{Fore.CYAN}[{timestamp}] {message}")
|
||||
|
||||
def check_imu_columns_in_file(self):
|
||||
"""检查文件是否包含IMU数据列(通过读取文件头)"""
|
||||
try:
|
||||
# 只读取第一行来检查列名
|
||||
with open(self.file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
first_line = f.readline().strip()
|
||||
|
||||
# 检查第一行是否包含imu关键词(不区分大小写)
|
||||
if re.search(r'imu', first_line, re.IGNORECASE):
|
||||
return True
|
||||
else:
|
||||
self.log_progress(f"文件头部不包含'imu'关键词,跳过处理,first_line {first_line}", "WARNING")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
self.log_progress(f"检查文件头部时出错: {str(e)}", "ERROR")
|
||||
return False
|
||||
|
||||
def detect_imu_columns(self):
|
||||
"""自动检测IMU数据列"""
|
||||
all_columns = self.data.columns.tolist()
|
||||
|
||||
# 查找imu前缀(如imu1, imu2等)
|
||||
imu_prefixes = set()
|
||||
for col in all_columns:
|
||||
match = re.match(r'^(imu\d+)_', col.lower())
|
||||
if match:
|
||||
imu_prefixes.add(match.group(1))
|
||||
|
||||
if not imu_prefixes:
|
||||
self.log_progress("未检测到IMU数据列,尝试使用默认列名", "WARNING")
|
||||
# 尝试使用常见列名
|
||||
self.acc_columns = ['imu1_acc_x', 'imu1_acc_y', 'imu1_acc_z']
|
||||
self.gyro_columns = ['imu1_gyro_x', 'imu1_gyro_y', 'imu1_gyro_z']
|
||||
self.temp_columns = ['imu1_temp']
|
||||
return
|
||||
|
||||
# 使用第一个检测到的IMU前缀
|
||||
imu_prefix = list(imu_prefixes)[0]
|
||||
self.log_progress(f"检测到IMU前缀: {imu_prefix}", "INFO")
|
||||
|
||||
# 查找加速度计列
|
||||
self.acc_columns = [col for col in all_columns
|
||||
if col.lower().startswith(f"{imu_prefix}_acc") and
|
||||
any(axis in col.lower() for axis in ['_x', '_y', '_z'])]
|
||||
|
||||
# 查找陀螺仪列
|
||||
self.gyro_columns = [col for col in all_columns
|
||||
if col.lower().startswith(f"{imu_prefix}_gyro") and
|
||||
any(axis in col.lower() for axis in ['_x', '_y', '_z'])]
|
||||
|
||||
# 查找温度列
|
||||
self.temp_columns = [col for col in all_columns
|
||||
if col.lower().startswith(f"{imu_prefix}_temp")]
|
||||
|
||||
# 如果没有找到温度列,尝试其他常见名称
|
||||
if not self.temp_columns:
|
||||
self.temp_columns = [col for col in all_columns
|
||||
if any(name in col.lower() for name in ['temp', 'temperature'])]
|
||||
|
||||
self.log_progress(f"加速度计列: {self.acc_columns}", "INFO")
|
||||
self.log_progress(f"陀螺仪列: {self.gyro_columns}", "INFO")
|
||||
self.log_progress(f"温度列: {self.temp_columns}", "INFO")
|
||||
|
||||
def estimate_sampling_rate(self):
|
||||
"""估计实际采样率"""
|
||||
if 'time' in self.data.columns and len(self.data) > 10:
|
||||
time_diff = np.diff(self.data['time'].values)
|
||||
valid_diffs = time_diff[(time_diff > 0) & (time_diff < 10)] # 排除异常值
|
||||
if len(valid_diffs) > 0:
|
||||
estimated_rate = 1.0 / np.median(valid_diffs)
|
||||
self.log_progress(f"根据时间戳估计的采样率: {estimated_rate:.2f} Hz")
|
||||
return estimated_rate
|
||||
|
||||
# 如果没有时间列或无法估计,使用基于文件名的默认值
|
||||
self.log_progress(f"使用基于文件名的默认采样率: {self.default_sampling_rate} Hz")
|
||||
return self.default_sampling_rate
|
||||
|
||||
def load_data(self):
|
||||
"""加载并预处理数据"""
|
||||
self.log_progress("开始加载数据...")
|
||||
start_time = time.time()
|
||||
|
||||
# 首先检查文件是否包含IMU数据
|
||||
if not self.check_imu_columns_in_file():
|
||||
raise ValueError("文件不包含IMU数据列,跳过处理")
|
||||
|
||||
# 使用锁确保文件读取的线程安全
|
||||
with file_lock:
|
||||
self.data = pd.read_csv(self.file_path)
|
||||
|
||||
self.log_progress(f"数据加载完成,共 {len(self.data)} 行,耗时 {time.time() - start_time:.2f}秒")
|
||||
|
||||
# 检测IMU数据列
|
||||
self.detect_imu_columns()
|
||||
|
||||
# 估计采样率
|
||||
self.sampling_rate = self.estimate_sampling_rate()
|
||||
|
||||
# 创建时间序列并处理异常时间值
|
||||
if 'time' in self.data.columns:
|
||||
valid_time_mask = (self.data['time'] > 0) & (self.data['time'] < 1e6)
|
||||
self.data = self.data[valid_time_mask].copy()
|
||||
self.data['time'] = np.arange(len(self.data)) / self.sampling_rate
|
||||
else:
|
||||
# 如果没有时间列,创建基于采样率的时间序列
|
||||
self.data['time'] = np.arange(len(self.data)) / self.sampling_rate
|
||||
|
||||
def remove_dc(self, signal_data):
|
||||
"""不移除直流分量(保留以在频谱中显示 DC)"""
|
||||
return signal_data
|
||||
|
||||
# def compute_spectrogram(self, signal_data):
|
||||
# """计算频谱图(保留直流分量)"""
|
||||
# # 保留直流分量
|
||||
# signal_data = self.remove_dc(signal_data)
|
||||
#
|
||||
# # 自适应窗口大小 - 根据采样率调整
|
||||
# if self.sampling_rate <= 10: # 低采样率
|
||||
# nperseg = min(64, max(16, len(signal_data) // 4))
|
||||
# else: # 高采样率
|
||||
# nperseg = min(1024, max(64, len(signal_data) // 8))
|
||||
#
|
||||
# noverlap = nperseg // 2
|
||||
#
|
||||
# f, t, Sxx = signal.spectrogram(
|
||||
# signal_data,
|
||||
# fs=self.sampling_rate,
|
||||
# window='hann',
|
||||
# nperseg=nperseg,
|
||||
# noverlap=noverlap,
|
||||
# scaling='density',
|
||||
# detrend=False, # 保留直流
|
||||
# mode='psd' # 更高效的模式
|
||||
# )
|
||||
# return f, t, Sxx
|
||||
|
||||
def compute_spectrogram(self, signal_data):
|
||||
"""计算频谱图(保留直流分量),优化频谱分辨率和减少颗粒感"""
|
||||
# 保留直流分量
|
||||
signal_data = self.remove_dc(signal_data)
|
||||
|
||||
# 数据长度
|
||||
n_samples = len(signal_data)
|
||||
|
||||
# 根据采样率和数据长度自适应选择参数
|
||||
if self.sampling_rate <= 10: # 低采样率(5Hz)
|
||||
# 对于低采样率,使用较长的窗口以获得更好的频率分辨率
|
||||
nperseg = min(256, max(64, n_samples // 2))
|
||||
noverlap = int(nperseg * 0.75) # 增加重叠比例
|
||||
|
||||
else: # 高采样率(1000Hz)
|
||||
# 对于高采样率,平衡时间分辨率和频率分辨率
|
||||
if n_samples < 10000: # 较短的数据
|
||||
nperseg = min(512, max(256, n_samples // 4))
|
||||
else: # 较长的数据
|
||||
nperseg = min(1024, max(512, n_samples // 8))
|
||||
|
||||
noverlap = int(nperseg * 0.66) # 适中的重叠比例
|
||||
|
||||
# 确保窗口大小合理
|
||||
nperseg = max(16, min(nperseg, n_samples))
|
||||
noverlap = min(noverlap, nperseg - 1)
|
||||
|
||||
# 使用更平滑的窗口函数
|
||||
f, t, Sxx = signal.spectrogram(
|
||||
signal_data,
|
||||
fs=self.sampling_rate,
|
||||
window='hamming', # 使用汉明窗,比汉宁窗更平滑
|
||||
nperseg=nperseg,
|
||||
noverlap=noverlap,
|
||||
scaling='density',
|
||||
# detrend='linear', # 使用线性去趋势,减少低频干扰
|
||||
detrend=False, # 保留直流
|
||||
mode='psd'
|
||||
)
|
||||
|
||||
# 应用平滑处理以减少颗粒感
|
||||
if Sxx.size > 0:
|
||||
# 使用小范围的高斯滤波平滑(可选)
|
||||
from scipy.ndimage import gaussian_filter
|
||||
Sxx_smoothed = gaussian_filter(Sxx, sigma=0.7)
|
||||
return f, t, Sxx_smoothed
|
||||
|
||||
return f, t, Sxx
|
||||
|
||||
def process_signal(self, args):
|
||||
"""并行处理单个信号"""
|
||||
signal_data, axis = args
|
||||
f, t, Sxx = self.compute_spectrogram(signal_data)
|
||||
|
||||
# 防止 log10(0)
|
||||
eps = np.finfo(float).eps
|
||||
Sxx_log = 10 * np.log10(Sxx + eps)
|
||||
|
||||
# 降采样以加速绘图
|
||||
if len(t) > 1000: # 如果时间点太多,进行降采样
|
||||
time_indices = np.linspace(0, len(t) - 1, 1000, dtype=int)
|
||||
freq_indices = np.linspace(0, len(f) - 1, 500, dtype=int)
|
||||
t = t[time_indices]
|
||||
f = f[freq_indices]
|
||||
Sxx_log = Sxx_log[freq_indices, :][:, time_indices]
|
||||
dc_idx = int(np.argmin(np.abs(f - 0.0)))
|
||||
dc_log = Sxx_log[dc_idx, :] # shape: (len(t),)
|
||||
|
||||
# 更健壮的 0 Hz 索引选择
|
||||
zero_idx = np.where(np.isclose(f, 0.0))[0]
|
||||
dc_idx = int(zero_idx[0]) if len(zero_idx) > 0 else int(np.argmin(np.abs(f - 0.0)))
|
||||
dc_log = Sxx_log[dc_idx, :] # 每个时间窗的 0 Hz PSD(dB)
|
||||
|
||||
return {
|
||||
'f': f,
|
||||
't': t,
|
||||
'Sxx_log': Sxx_log,
|
||||
'dc_log': dc_log,
|
||||
'axis': axis
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def robust_dc_ylim(results, p_low=5, p_high=95, pad_ratio=0.05, fallback=(0.0, 1.0)):
|
||||
"""
|
||||
计算统一 DC 纵轴范围(分位数 + 少许边距),并过滤 inf/NaN
|
||||
"""
|
||||
if not results:
|
||||
return fallback
|
||||
dc_all = np.concatenate([r['dc_log'].ravel() for r in results])
|
||||
dc_all = dc_all[np.isfinite(dc_all)]
|
||||
if dc_all.size == 0:
|
||||
return fallback
|
||||
lo, hi = np.percentile(dc_all, [p_low, p_high])
|
||||
span = max(1e-9, hi - lo)
|
||||
lo -= span * pad_ratio
|
||||
hi += span * pad_ratio
|
||||
return lo, hi
|
||||
|
||||
def plot_time_series(self):
|
||||
"""绘制时间序列图"""
|
||||
self.log_progress("开始绘制时间序列图...")
|
||||
start_time = time.time()
|
||||
|
||||
# 确定子图数量
|
||||
n_plots = 1 # 至少有一个加速度图
|
||||
if self.gyro_columns: # 如果有陀螺仪数据
|
||||
n_plots += 1
|
||||
if self.temp_columns: # 如果有温度数据
|
||||
n_plots += 1
|
||||
|
||||
fig, axes = plt.subplots(n_plots, 1, figsize=(12, 3 * n_plots), dpi=120)
|
||||
if n_plots == 1:
|
||||
axes = [axes] # 确保axes是列表
|
||||
|
||||
plot_idx = 0
|
||||
|
||||
# 加速度计数据
|
||||
if self.acc_columns:
|
||||
ax = axes[plot_idx]
|
||||
colors = ['#1f77b4', '#ff7f0e', '#2ca02c']
|
||||
labels = ['X', 'Y', 'Z']
|
||||
for i, col in enumerate(self.acc_columns):
|
||||
if i < 3: # 只绘制前三个轴
|
||||
ax.plot(self.data['time'], self.data[col],
|
||||
label=labels[i], color=colors[i], linewidth=1.0, alpha=0.8)
|
||||
ax.set_title('加速度时间序列', fontsize=12)
|
||||
ax.set_ylabel('加速度 (g)', fontsize=10)
|
||||
ax.legend(loc='upper right', fontsize=8, framealpha=0.5)
|
||||
ax.grid(True, linestyle=':', alpha=0.5)
|
||||
ax.set_xlim(0, self.data['time'].max())
|
||||
plot_idx += 1
|
||||
|
||||
# 陀螺仪数据(如果有)
|
||||
if self.gyro_columns and plot_idx < n_plots:
|
||||
ax = axes[plot_idx]
|
||||
colors = ['#1f77b4', '#ff7f0e', '#2ca02c']
|
||||
labels = ['X', 'Y', 'Z']
|
||||
for i, col in enumerate(self.gyro_columns):
|
||||
if i < 3: # 只绘制前三个轴
|
||||
ax.plot(self.data['time'], self.data[col],
|
||||
label=labels[i], color=colors[i], linewidth=1.0, alpha=0.8)
|
||||
ax.set_title('陀螺仪时间序列', fontsize=12)
|
||||
ax.set_ylabel('角速度 (deg/s)', fontsize=10)
|
||||
ax.legend(loc='upper left', fontsize=8, framealpha=0.5)
|
||||
ax.grid(True, linestyle=':', alpha=0.5)
|
||||
ax.set_xlim(0, self.data['time'].max())
|
||||
plot_idx += 1
|
||||
|
||||
# 温度数据(如果有)
|
||||
if self.temp_columns and plot_idx < n_plots:
|
||||
ax = axes[plot_idx]
|
||||
ax.plot(self.data['time'], self.data[self.temp_columns[0]],
|
||||
label='温度', color='#9467bd', linewidth=1.0, alpha=0.8)
|
||||
ax.set_title('温度时间序列', fontsize=12)
|
||||
ax.set_xlabel('时间 (s)', fontsize=10)
|
||||
ax.set_ylabel('温度 (°C)', fontsize=10)
|
||||
ax.legend(loc='upper right', fontsize=8, framealpha=0.5)
|
||||
ax.grid(True, linestyle=':', alpha=0.5)
|
||||
ax.set_xlim(0, self.data['time'].max())
|
||||
|
||||
plt.tight_layout()
|
||||
output_path = os.path.join(self.output_dir, f'time_series_{self.timestamp}.png')
|
||||
plt.savefig(output_path, bbox_inches='tight', dpi=150)
|
||||
plt.close(fig)
|
||||
self.log_progress(f"时间序列图已保存: {output_path}")
|
||||
self.log_progress(f"时间序列图已保存为 {output_path},耗时 {time.time() - start_time:.2f}秒")
|
||||
|
||||
def plot_rainfall_spectrograms(self):
|
||||
"""并行绘制所有频谱雨点图(修复colorbar布局问题)"""
|
||||
self.log_progress("开始并行绘制频谱雨点图...")
|
||||
start_time = time.time()
|
||||
|
||||
# 准备加速度计数据
|
||||
self.log_progress("准备加速度计数据...")
|
||||
acc_signals = [(self.data[col], f'Acc {["X", "Y", "Z"][i]}')
|
||||
for i, col in enumerate(self.acc_columns) if i < 3] # 只处理前三个轴
|
||||
|
||||
# 准备陀螺仪数据(如果有)
|
||||
gyro_signals = []
|
||||
if self.gyro_columns:
|
||||
self.log_progress("准备陀螺仪数据...")
|
||||
gyro_signals = [(self.data[col], f'Gyro {["X", "Y", "Z"][i]}')
|
||||
for i, col in enumerate(self.gyro_columns) if i < 3] # 只处理前三个轴
|
||||
|
||||
# 如果没有数据可处理,直接返回
|
||||
if not acc_signals and not gyro_signals:
|
||||
self.log_progress("警告: 没有有效的数据列可供处理", "WARNING")
|
||||
return
|
||||
|
||||
# 使用多进程处理信号(避免线程冲突)
|
||||
self.log_progress("使用多进程并行处理...")
|
||||
all_signals = acc_signals + gyro_signals
|
||||
with Pool(processes=min(len(all_signals), cpu_count())) as pool:
|
||||
results = pool.map(self.process_signal, all_signals)
|
||||
|
||||
# 分离结果
|
||||
self.log_progress("分离结果...")
|
||||
acc_results = [r for r in results if r['axis'].startswith('Acc')]
|
||||
gyro_results = [r for r in results if r['axis'].startswith('Gyro')]
|
||||
|
||||
# 统一颜色标尺(5%-95%分位)
|
||||
if acc_results:
|
||||
self.log_progress("计算加速度计全局最小和最大值...")
|
||||
acc_all_Sxx = np.concatenate([r['Sxx_log'].ravel() for r in acc_results])
|
||||
acc_vmin, acc_vmax = np.percentile(acc_all_Sxx, [5, 95])
|
||||
|
||||
# 统一 DC Y 轴范围
|
||||
acc_dc_ymin, acc_dc_ymax = self.robust_dc_ylim(acc_results)
|
||||
self.log_progress(f"加速度 DC (dB) 范围: {acc_dc_ymin:.1f} 到 {acc_dc_ymax:.1f}")
|
||||
|
||||
if gyro_results:
|
||||
self.log_progress("计算陀螺仪全局最小和最大值...")
|
||||
gyro_all_Sxx = np.concatenate([r['Sxx_log'].ravel() for r in gyro_results])
|
||||
gyro_vmin, gyro_vmax = np.percentile(gyro_all_Sxx, [5, 95])
|
||||
|
||||
# 统一 DC Y 轴范围
|
||||
gyro_dc_ymin, gyro_dc_ymax = self.robust_dc_ylim(gyro_results)
|
||||
self.log_progress(f"陀螺仪 DC (dB) 范围: {gyro_dc_ymin:.1f} 到 {gyro_dc_ymax:.1f}")
|
||||
|
||||
# ========= 绘制加速度计频谱雨点图 =========
|
||||
if acc_results:
|
||||
self._plot_single_spectrogram(acc_results, acc_vmin, acc_vmax, acc_dc_ymin, acc_dc_ymax,
|
||||
'加速度', 'acc_rainfall_spectrogram')
|
||||
self.log_progress(f"加速度功率谱密度范围: {acc_vmin:.1f} dB 到 {acc_vmax:.1f} dB")
|
||||
|
||||
# ========= 绘制陀螺仪频谱雨点图 =========
|
||||
if gyro_results:
|
||||
self._plot_single_spectrogram(gyro_results, gyro_vmin, gyro_vmax, gyro_dc_ymin, gyro_dc_ymax,
|
||||
'角速度', 'gyro_rainfall_spectrogram')
|
||||
self.log_progress(f"陀螺仪功率谱密度范围: {gyro_vmin:.1f} dB 到 {gyro_vmax:.1f} dB")
|
||||
|
||||
total_time = time.time() - start_time
|
||||
self.log_progress(f"频谱雨点图生成完成,总耗时 {total_time:.2f}秒")
|
||||
|
||||
def _plot_single_spectrogram(self, results, vmin, vmax, dc_ymin, dc_ymax, title_prefix, filename_prefix):
|
||||
"""绘制单个频谱雨点图"""
|
||||
rows = len(results)
|
||||
fig = plt.figure(constrained_layout=True, figsize=(14, 4 * rows), dpi=150)
|
||||
gs = fig.add_gridspec(nrows=rows, ncols=2, width_ratios=[22, 1], wspace=0.05, hspace=0.12)
|
||||
|
||||
axes_main = []
|
||||
axes_cbar = []
|
||||
for i in range(rows):
|
||||
axes_main.append(fig.add_subplot(gs[i, 0]))
|
||||
axes_cbar.append(fig.add_subplot(gs[i, 1]))
|
||||
|
||||
for i, result in enumerate(results):
|
||||
ax = axes_main[i]
|
||||
cax = axes_cbar[i]
|
||||
|
||||
sc = ax.scatter(
|
||||
np.repeat(result['t'], len(result['f'])),
|
||||
np.tile(result['f'], len(result['t'])),
|
||||
c=result['Sxx_log'].T.ravel(),
|
||||
cmap='jet',
|
||||
s=3,
|
||||
alpha=0.7,
|
||||
vmin=vmin,
|
||||
vmax=vmax,
|
||||
rasterized=True
|
||||
)
|
||||
|
||||
ax.set_title(f'{title_prefix}频谱雨点图 - {result["axis"][-1]}(右侧为DC分量 dB)', fontsize=10)
|
||||
ax.set_xlabel('时间 (s)', fontsize=9)
|
||||
ax.set_ylabel('频率 (Hz)', fontsize=9)
|
||||
ax.set_ylim(0, self.sampling_rate / 2)
|
||||
ax.grid(True, linestyle=':', alpha=0.4)
|
||||
|
||||
ax2 = ax.twinx()
|
||||
ax2.plot(result['t'], result['dc_log'], color='black', linewidth=1.2, alpha=0.85, label='DC (dB)')
|
||||
ax2.set_ylabel('直流分量 (dB)', fontsize=9, color='black')
|
||||
ax2.set_ylim(dc_ymin, dc_ymax)
|
||||
ax2.tick_params(axis='y', labelcolor='black')
|
||||
ax2.yaxis.set_major_locator(MaxNLocator(nbins=6))
|
||||
ax2.grid(False)
|
||||
ax2.legend(loc='upper right', fontsize=8, framealpha=0.5)
|
||||
|
||||
cbar = fig.colorbar(sc, cax=cax)
|
||||
cbar.set_label('功率谱密度 (dB)', fontsize=9)
|
||||
cax.tick_params(labelsize=8)
|
||||
|
||||
output_path = os.path.join(self.output_dir, f'{filename_prefix}_{self.timestamp}.png')
|
||||
plt.savefig(output_path, bbox_inches='tight', dpi=150)
|
||||
plt.close(fig)
|
||||
self.log_progress(f"{title_prefix}频谱雨点图已保存为 {output_path}")
|
||||
|
||||
def run_analysis(self):
|
||||
"""运行完整分析流程"""
|
||||
try:
|
||||
self.log_progress("开始数据分析流程", "INFO")
|
||||
start_time = time.time()
|
||||
|
||||
self.load_data()
|
||||
self.plot_time_series()
|
||||
self.plot_rainfall_spectrograms()
|
||||
|
||||
total_time = time.time() - start_time
|
||||
self.log_progress(f"分析完成,总耗时 {total_time:.2f}秒", "SUCCESS")
|
||||
self.log_progress(f"所有输出文件已保存到: {self.output_dir}", "INFO")
|
||||
return True
|
||||
|
||||
except ValueError as e:
|
||||
# 跳过不包含IMU数据的文件
|
||||
self.log_progress(f"跳过文件: {str(e)}", "WARNING")
|
||||
return False
|
||||
except Exception as e:
|
||||
self.log_progress(f"分析过程中出现错误: {str(e)}", "ERROR")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
|
||||
def process_single_file(file_path):
|
||||
"""处理单个文件的函数(使用进程隔离)"""
|
||||
try:
|
||||
print(f"{Fore.BLUE}开始处理文件: {os.path.basename(file_path)}")
|
||||
analyzer = IMUDataAnalyzer(file_path)
|
||||
success = analyzer.run_analysis()
|
||||
if success:
|
||||
return (file_path, True, "处理成功")
|
||||
else:
|
||||
return (file_path, False, "文件不包含IMU数据,已跳过")
|
||||
except Exception as e:
|
||||
return (file_path, False, str(e))
|
||||
|
||||
|
||||
def main():
|
||||
"""主函数,支持多文件处理和进度显示"""
|
||||
print("=" * 60)
|
||||
print(f"{Fore.CYAN}IMU数据频谱分析工具 - 多文件批量处理")
|
||||
print("=" * 60)
|
||||
|
||||
# 获取输入路径
|
||||
print(f"{Fore.WHITE}请输入包含CSV文件的目录路径: ")
|
||||
input_path = input("> ").strip()
|
||||
|
||||
if not os.path.exists(input_path):
|
||||
print(f"{Fore.RED}错误: 路径 '{input_path}' 不存在!")
|
||||
return
|
||||
|
||||
# 查找所有包含imu的CSV文件(不区分大小写)
|
||||
if os.path.isdir(input_path):
|
||||
# 使用单个glob模式匹配所有文件,然后过滤包含imu的文件
|
||||
all_csv_files = glob.glob(os.path.join(input_path, "**", "*.csv"), recursive=True)
|
||||
csv_files = [f for f in all_csv_files if re.search(r'imu', f, re.IGNORECASE)]
|
||||
csv_files = list(set(csv_files)) # 去重
|
||||
csv_files.sort()
|
||||
else:
|
||||
# 对于单个文件,检查是否包含imu(不区分大小写)
|
||||
if re.search(r'imu', input_path, re.IGNORECASE):
|
||||
csv_files = [input_path]
|
||||
else:
|
||||
csv_files = []
|
||||
|
||||
if not csv_files:
|
||||
print(f"{Fore.YELLOW}警告: 未找到包含'imu'的CSV文件")
|
||||
return
|
||||
|
||||
print(f"{Fore.GREEN}找到 {len(csv_files)} 个IMU数据文件:")
|
||||
for i, file in enumerate(csv_files, 1):
|
||||
print(f" {i}. {os.path.basename(file)}")
|
||||
|
||||
# 使用多进程处理文件(避免matplotlib线程冲突)
|
||||
print(f"\n{Fore.CYAN}开始多线程处理文件 (使用 {min(len(csv_files), cpu_count())} 个线程)...")
|
||||
|
||||
success_count = 0
|
||||
skipped_count = 0
|
||||
failed_files = []
|
||||
|
||||
# 使用ProcessPoolExecutor而不是ThreadPoolExecutor
|
||||
with ProcessPoolExecutor(max_workers=min(len(csv_files), cpu_count())) as executor:
|
||||
# 提交所有任务
|
||||
future_to_file = {executor.submit(process_single_file, file): file for file in csv_files}
|
||||
|
||||
# 处理完成的任务
|
||||
for future in as_completed(future_to_file):
|
||||
file_path = future_to_file[future]
|
||||
try:
|
||||
result = future.result()
|
||||
file_path, success, message = result
|
||||
if success:
|
||||
print(f"{Fore.GREEN}✓ 完成: {os.path.basename(file_path)}")
|
||||
success_count += 1
|
||||
else:
|
||||
if "跳过" in message:
|
||||
print(f"{Fore.YELLOW}↷ 跳过: {os.path.basename(file_path)} - {message}")
|
||||
skipped_count += 1
|
||||
else:
|
||||
print(f"{Fore.RED}✗ 失败: {os.path.basename(file_path)} - {message}")
|
||||
failed_files.append((file_path, message))
|
||||
except Exception as e:
|
||||
print(f"{Fore.RED}✗ 异常: {os.path.basename(file_path)} - {str(e)}")
|
||||
failed_files.append((file_path, str(e)))
|
||||
|
||||
# 输出统计信息
|
||||
print(f"\n{Fore.CYAN}处理完成统计:")
|
||||
print(f"{Fore.GREEN}成功: {success_count} 个文件")
|
||||
print(f"{Fore.YELLOW}跳过: {skipped_count} 个文件(不包含IMU数据)")
|
||||
print(f"{Fore.RED}失败: {len(failed_files)} 个文件")
|
||||
|
||||
if failed_files:
|
||||
print(f"\n{Fore.YELLOW}失败文件详情:")
|
||||
for file, error in failed_files:
|
||||
print(f" {os.path.basename(file)}: {error}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main()
|
||||
except KeyboardInterrupt:
|
||||
print(f"\n{Fore.YELLOW}用户中断程序执行")
|
||||
except Exception as e:
|
||||
print(f"{Fore.RED}程序运行出错: {str(e)}")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
648
FFT_IMU/FFT_IMU_dc_v1.py
Normal file
648
FFT_IMU/FFT_IMU_dc_v1.py
Normal file
@@ -0,0 +1,648 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib
|
||||
|
||||
matplotlib.use('Agg')
|
||||
import matplotlib.pyplot as plt
|
||||
from scipy import signal
|
||||
import os
|
||||
import glob
|
||||
from datetime import datetime
|
||||
import time
|
||||
from multiprocessing import Pool, cpu_count
|
||||
from matplotlib.colors import Normalize
|
||||
from matplotlib.ticker import MaxNLocator
|
||||
import re
|
||||
from colorama import Fore, Style, init
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
import warnings
|
||||
import threading
|
||||
|
||||
# 初始化colorama
|
||||
init(autoreset=True)
|
||||
|
||||
# 忽略特定的matplotlib警告
|
||||
warnings.filterwarnings("ignore", category=UserWarning, module="matplotlib")
|
||||
warnings.filterwarnings("ignore", category=FutureWarning, module="matplotlib")
|
||||
|
||||
# 创建线程锁,确保文件操作和日志输出的线程安全
|
||||
file_lock = threading.Lock()
|
||||
log_lock = threading.Lock()
|
||||
|
||||
|
||||
class IMUDataAnalyzer:
|
||||
def __init__(self, file_path):
|
||||
self.file_path = file_path
|
||||
self.data = None
|
||||
self.sampling_rate = None
|
||||
self.fig_size = (15, 10)
|
||||
|
||||
# 从文件名推断数据类型和采样率
|
||||
file_name = os.path.basename(file_path).lower()
|
||||
if 'calib' in file_name:
|
||||
self.data_type = 'calib'
|
||||
self.default_sampling_rate = 5
|
||||
elif 'raw' in file_name:
|
||||
self.data_type = 'raw'
|
||||
self.default_sampling_rate = 1000
|
||||
else:
|
||||
self.data_type = 'unknown'
|
||||
self.default_sampling_rate = 5
|
||||
|
||||
# 解析文件路径和文件名
|
||||
file_dir = os.path.dirname(os.path.abspath(file_path))
|
||||
file_base_name = os.path.splitext(os.path.basename(file_path))[0]
|
||||
self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
# 创建文件名称+时间戳尾缀的输出目录
|
||||
self.output_dir = os.path.join(file_dir, f"{file_base_name}_output_{self.timestamp}")
|
||||
|
||||
# 使用锁确保目录创建的线程安全
|
||||
with file_lock:
|
||||
if not os.path.exists(self.output_dir):
|
||||
os.makedirs(self.output_dir)
|
||||
self.log_progress(f"创建输出目录:{self.output_dir}", "INFO")
|
||||
|
||||
# 字体设置
|
||||
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'Arial']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
|
||||
# 设置matplotlib兼容性选项,避免布局引擎冲突
|
||||
plt.rcParams['figure.constrained_layout.use'] = False
|
||||
plt.rcParams['figure.constrained_layout.h_pad'] = 0.02
|
||||
plt.rcParams['figure.constrained_layout.w_pad'] = 0.02
|
||||
plt.rcParams['figure.constrained_layout.hspace'] = 0.02
|
||||
plt.rcParams['figure.constrained_layout.wspace'] = 0.02
|
||||
|
||||
self.log_progress(f"处理文件:{self.file_path}", "INFO")
|
||||
self.log_progress(f"数据类型:{self.data_type}", "INFO")
|
||||
self.log_progress(f"输出路径:{self.output_dir}", "INFO")
|
||||
|
||||
def log_progress(self, message, level="INFO"):
|
||||
"""带颜色和级别的日志输出(线程安全)"""
|
||||
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||
with log_lock:
|
||||
if level == "INFO":
|
||||
print(f"{Fore.CYAN}[{timestamp}] {Fore.GREEN}{message}")
|
||||
elif level == "WARNING":
|
||||
print(f"{Fore.CYAN}[{timestamp}] {Fore.YELLOW}警告: {message}")
|
||||
elif level == "ERROR":
|
||||
print(f"{Fore.CYAN}[{timestamp}] {Fore.RED}错误: {message}")
|
||||
elif level == "SUCCESS":
|
||||
print(f"{Fore.CYAN}[{timestamp}] {Fore.GREEN}✓ {message}")
|
||||
else:
|
||||
print(f"{Fore.CYAN}[{timestamp}] {message}")
|
||||
|
||||
def check_imu_columns_in_file(self):
|
||||
"""检查文件是否包含IMU数据列(通过读取文件头)"""
|
||||
try:
|
||||
# 只读取第一行来检查列名
|
||||
with open(self.file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
first_line = f.readline().strip()
|
||||
|
||||
# 检查第一行是否包含imu关键词(不区分大小写)
|
||||
if re.search(r'imu', first_line, re.IGNORECASE):
|
||||
return True
|
||||
else:
|
||||
self.log_progress(f"文件头部不包含'imu'关键词,跳过处理,first_line {first_line}", "WARNING")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
self.log_progress(f"检查文件头部时出错: {str(e)}", "ERROR")
|
||||
return False
|
||||
|
||||
def detect_imu_columns(self):
|
||||
"""自动检测IMU数据列"""
|
||||
all_columns = self.data.columns.tolist()
|
||||
|
||||
# 查找imu前缀(如imu1, imu2等)
|
||||
imu_prefixes = set()
|
||||
for col in all_columns:
|
||||
match = re.match(r'^(imu\d+)_', col.lower())
|
||||
if match:
|
||||
imu_prefixes.add(match.group(1))
|
||||
|
||||
if not imu_prefixes:
|
||||
self.log_progress("未检测到IMU数据列,尝试使用默认列名", "WARNING")
|
||||
# 尝试使用常见列名
|
||||
self.acc_columns = ['imu1_acc_x', 'imu1_acc_y', 'imu1_acc_z']
|
||||
self.gyro_columns = ['imu1_gyro_x', 'imu1_gyro_y', 'imu1_gyro_z']
|
||||
self.temp_columns = ['imu1_temp']
|
||||
return
|
||||
|
||||
# 使用第一个检测到的IMU前缀
|
||||
imu_prefix = list(imu_prefixes)[0]
|
||||
self.log_progress(f"检测到IMU前缀: {imu_prefix}", "INFO")
|
||||
|
||||
# 查找加速度计列
|
||||
self.acc_columns = [col for col in all_columns
|
||||
if col.lower().startswith(f"{imu_prefix}_acc") and
|
||||
any(axis in col.lower() for axis in ['_x', '_y', '_z'])]
|
||||
|
||||
# 查找陀螺仪列
|
||||
self.gyro_columns = [col for col in all_columns
|
||||
if col.lower().startswith(f"{imu_prefix}_gyro") and
|
||||
any(axis in col.lower() for axis in ['_x', '_y', '_z'])]
|
||||
|
||||
# 查找温度列
|
||||
self.temp_columns = [col for col in all_columns
|
||||
if col.lower().startswith(f"{imu_prefix}_temp")]
|
||||
|
||||
# 如果没有找到温度列,尝试其他常见名称
|
||||
if not self.temp_columns:
|
||||
self.temp_columns = [col for col in all_columns
|
||||
if any(name in col.lower() for name in ['temp', 'temperature'])]
|
||||
|
||||
self.log_progress(f"加速度计列: {self.acc_columns}", "INFO")
|
||||
self.log_progress(f"陀螺仪列: {self.gyro_columns}", "INFO")
|
||||
self.log_progress(f"温度列: {self.temp_columns}", "INFO")
|
||||
|
||||
def estimate_sampling_rate(self):
|
||||
"""估计实际采样率"""
|
||||
if 'time' in self.data.columns and len(self.data) > 10:
|
||||
time_diff = np.diff(self.data['time'].values)
|
||||
valid_diffs = time_diff[(time_diff > 0) & (time_diff < 10)] # 排除异常值
|
||||
if len(valid_diffs) > 0:
|
||||
estimated_rate = 1.0 / np.median(valid_diffs)
|
||||
self.log_progress(f"根据时间戳估计的采样率: {estimated_rate:.2f} Hz")
|
||||
return estimated_rate
|
||||
|
||||
# 如果没有时间列或无法估计,使用基于文件名的默认值
|
||||
self.log_progress(f"使用基于文件名的默认采样率: {self.default_sampling_rate} Hz")
|
||||
return self.default_sampling_rate
|
||||
|
||||
def load_data(self):
|
||||
"""加载并预处理数据"""
|
||||
self.log_progress("开始加载数据...")
|
||||
start_time = time.time()
|
||||
|
||||
# 首先检查文件是否包含IMU数据
|
||||
if not self.check_imu_columns_in_file():
|
||||
raise ValueError("文件不包含IMU数据列,跳过处理")
|
||||
|
||||
# 使用锁确保文件读取的线程安全
|
||||
with file_lock:
|
||||
self.data = pd.read_csv(self.file_path)
|
||||
|
||||
self.log_progress(f"数据加载完成,共 {len(self.data)} 行,耗时 {time.time() - start_time:.2f}秒")
|
||||
|
||||
# 检测IMU数据列
|
||||
self.detect_imu_columns()
|
||||
|
||||
# 估计采样率
|
||||
self.sampling_rate = self.estimate_sampling_rate()
|
||||
|
||||
# 创建时间序列并处理异常时间值
|
||||
if 'time' in self.data.columns:
|
||||
valid_time_mask = (self.data['time'] > 0) & (self.data['time'] < 1e6)
|
||||
self.data = self.data[valid_time_mask].copy()
|
||||
self.data['time'] = np.arange(len(self.data)) / self.sampling_rate
|
||||
else:
|
||||
# 如果没有时间列,创建基于采样率的时间序列
|
||||
self.data['time'] = np.arange(len(self.data)) / self.sampling_rate
|
||||
|
||||
def remove_dc(self, signal_data):
|
||||
"""不移除直流分量(保留以在频谱中显示 DC)"""
|
||||
return signal_data
|
||||
|
||||
# def compute_spectrogram(self, signal_data):
|
||||
# """计算频谱图(保留直流分量)"""
|
||||
# # 保留直流分量
|
||||
# signal_data = self.remove_dc(signal_data)
|
||||
#
|
||||
# # 自适应窗口大小 - 根据采样率调整
|
||||
# if self.sampling_rate <= 10: # 低采样率
|
||||
# nperseg = min(64, max(16, len(signal_data) // 4))
|
||||
# else: # 高采样率
|
||||
# nperseg = min(1024, max(64, len(signal_data) // 8))
|
||||
#
|
||||
# noverlap = nperseg // 2
|
||||
#
|
||||
# f, t, Sxx = signal.spectrogram(
|
||||
# signal_data,
|
||||
# fs=self.sampling_rate,
|
||||
# window='hann',
|
||||
# nperseg=nperseg,
|
||||
# noverlap=noverlap,
|
||||
# scaling='density',
|
||||
# detrend=False, # 保留直流
|
||||
# mode='psd' # 更高效的模式
|
||||
# )
|
||||
# return f, t, Sxx
|
||||
|
||||
def compute_spectrogram(self, signal_data):
|
||||
"""计算频谱图(保留直流分量),优化频谱分辨率和减少颗粒感"""
|
||||
# 保留直流分量
|
||||
signal_data = self.remove_dc(signal_data)
|
||||
|
||||
# 数据长度
|
||||
n_samples = len(signal_data)
|
||||
|
||||
# 根据采样率和数据长度自适应选择参数
|
||||
if self.sampling_rate <= 10: # 低采样率(5Hz)
|
||||
# 对于低采样率,使用较长的窗口以获得更好的频率分辨率
|
||||
nperseg = min(256, max(64, n_samples // 2))
|
||||
noverlap = int(nperseg * 0.75) # 增加重叠比例
|
||||
|
||||
else: # 高采样率(1000Hz)
|
||||
# 对于高采样率,平衡时间分辨率和频率分辨率
|
||||
if n_samples < 10000: # 较短的数据
|
||||
nperseg = min(512, max(256, n_samples // 4))
|
||||
else: # 较长的数据
|
||||
nperseg = min(1024, max(512, n_samples // 8))
|
||||
|
||||
noverlap = int(nperseg * 0.66) # 适中的重叠比例
|
||||
|
||||
# 确保窗口大小合理
|
||||
nperseg = max(16, min(nperseg, n_samples))
|
||||
noverlap = min(noverlap, nperseg - 1)
|
||||
|
||||
# 使用更平滑的窗口函数
|
||||
f, t, Sxx = signal.spectrogram(
|
||||
signal_data,
|
||||
fs=self.sampling_rate,
|
||||
window='hamming', # 使用汉明窗,比汉宁窗更平滑
|
||||
nperseg=nperseg,
|
||||
noverlap=noverlap,
|
||||
scaling='density',
|
||||
# detrend='linear', # 使用线性去趋势,减少低频干扰
|
||||
detrend=False, # 保留直流
|
||||
mode='psd'
|
||||
)
|
||||
|
||||
# 应用平滑处理以减少颗粒感
|
||||
if Sxx.size > 0:
|
||||
# 使用小范围的高斯滤波平滑(可选)
|
||||
from scipy.ndimage import gaussian_filter
|
||||
Sxx_smoothed = gaussian_filter(Sxx, sigma=0.7)
|
||||
return f, t, Sxx_smoothed
|
||||
|
||||
return f, t, Sxx
|
||||
|
||||
def process_signal(self, args):
|
||||
"""并行处理单个信号"""
|
||||
signal_data, axis = args
|
||||
f, t, Sxx = self.compute_spectrogram(signal_data)
|
||||
|
||||
# 防止 log10(0)
|
||||
eps = np.finfo(float).eps
|
||||
Sxx_log = 10 * np.log10(Sxx + eps)
|
||||
|
||||
# 降采样以加速绘图
|
||||
if len(t) > 1000: # 如果时间点太多,进行降采样
|
||||
time_indices = np.linspace(0, len(t) - 1, 1000, dtype=int)
|
||||
freq_indices = np.linspace(0, len(f) - 1, 500, dtype=int)
|
||||
t = t[time_indices]
|
||||
f = f[freq_indices]
|
||||
Sxx_log = Sxx_log[freq_indices, :][:, time_indices]
|
||||
dc_idx = int(np.argmin(np.abs(f - 0.0)))
|
||||
dc_log = Sxx_log[dc_idx, :] # shape: (len(t),)
|
||||
|
||||
# 更健壮的 0 Hz 索引选择
|
||||
zero_idx = np.where(np.isclose(f, 0.0))[0]
|
||||
dc_idx = int(zero_idx[0]) if len(zero_idx) > 0 else int(np.argmin(np.abs(f - 0.0)))
|
||||
dc_log = Sxx_log[dc_idx, :] # 每个时间窗的 0 Hz PSD(dB)
|
||||
|
||||
return {
|
||||
'f': f,
|
||||
't': t,
|
||||
'Sxx_log': Sxx_log,
|
||||
'dc_log': dc_log,
|
||||
'axis': axis
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def robust_dc_ylim(results, p_low=5, p_high=95, pad_ratio=0.05, fallback=(0.0, 1.0)):
|
||||
"""
|
||||
计算统一 DC 纵轴范围(分位数 + 少许边距),并过滤 inf/NaN
|
||||
"""
|
||||
if not results:
|
||||
return fallback
|
||||
dc_all = np.concatenate([r['dc_log'].ravel() for r in results])
|
||||
dc_all = dc_all[np.isfinite(dc_all)]
|
||||
if dc_all.size == 0:
|
||||
return fallback
|
||||
lo, hi = np.percentile(dc_all, [p_low, p_high])
|
||||
span = max(1e-9, hi - lo)
|
||||
lo -= span * pad_ratio
|
||||
hi += span * pad_ratio
|
||||
return lo, hi
|
||||
|
||||
def plot_time_series(self):
|
||||
"""绘制时间序列图"""
|
||||
self.log_progress("开始绘制时间序列图...")
|
||||
start_time = time.time()
|
||||
|
||||
# 确定子图数量
|
||||
n_plots = 1 # 至少有一个加速度图
|
||||
if self.gyro_columns: # 如果有陀螺仪数据
|
||||
n_plots += 1
|
||||
if self.temp_columns: # 如果有温度数据
|
||||
n_plots += 1
|
||||
|
||||
fig, axes = plt.subplots(n_plots, 1, figsize=(12, 3 * n_plots), dpi=120)
|
||||
if n_plots == 1:
|
||||
axes = [axes] # 确保axes是列表
|
||||
|
||||
plot_idx = 0
|
||||
|
||||
# 加速度计数据
|
||||
if self.acc_columns:
|
||||
ax = axes[plot_idx]
|
||||
colors = ['#1f77b4', '#ff7f0e', '#2ca02c']
|
||||
labels = ['X', 'Y', 'Z']
|
||||
for i, col in enumerate(self.acc_columns):
|
||||
if i < 3: # 只绘制前三个轴
|
||||
ax.plot(self.data['time'], self.data[col],
|
||||
label=labels[i], color=colors[i], linewidth=1.0, alpha=0.8)
|
||||
ax.set_title('加速度时间序列', fontsize=12)
|
||||
ax.set_ylabel('加速度 (g)', fontsize=10)
|
||||
ax.legend(loc='upper right', fontsize=8, framealpha=0.5)
|
||||
ax.grid(True, linestyle=':', alpha=0.5)
|
||||
ax.set_xlim(0, self.data['time'].max())
|
||||
plot_idx += 1
|
||||
|
||||
# 陀螺仪数据(如果有)
|
||||
if self.gyro_columns and plot_idx < n_plots:
|
||||
ax = axes[plot_idx]
|
||||
colors = ['#1f77b4', '#ff7f0e', '#2ca02c']
|
||||
labels = ['X', 'Y', 'Z']
|
||||
for i, col in enumerate(self.gyro_columns):
|
||||
if i < 3: # 只绘制前三个轴
|
||||
ax.plot(self.data['time'], self.data[col],
|
||||
label=labels[i], color=colors[i], linewidth=1.0, alpha=0.8)
|
||||
ax.set_title('陀螺仪时间序列', fontsize=12)
|
||||
ax.set_ylabel('角速度 (deg/s)', fontsize=10)
|
||||
ax.legend(loc='upper left', fontsize=8, framealpha=0.5)
|
||||
ax.grid(True, linestyle=':', alpha=0.5)
|
||||
ax.set_xlim(0, self.data['time'].max())
|
||||
plot_idx += 1
|
||||
|
||||
# 温度数据(如果有)
|
||||
if self.temp_columns and plot_idx < n_plots:
|
||||
ax = axes[plot_idx]
|
||||
ax.plot(self.data['time'], self.data[self.temp_columns[0]],
|
||||
label='温度', color='#9467bd', linewidth=1.0, alpha=0.8)
|
||||
ax.set_title('温度时间序列', fontsize=12)
|
||||
ax.set_xlabel('时间 (s)', fontsize=10)
|
||||
ax.set_ylabel('温度 (°C)', fontsize=10)
|
||||
ax.legend(loc='upper right', fontsize=8, framealpha=0.5)
|
||||
ax.grid(True, linestyle=':', alpha=0.5)
|
||||
ax.set_xlim(0, self.data['time'].max())
|
||||
|
||||
plt.tight_layout()
|
||||
output_path = os.path.join(self.output_dir, f'time_series_{self.timestamp}.png')
|
||||
plt.savefig(output_path, bbox_inches='tight', dpi=150)
|
||||
plt.close(fig)
|
||||
self.log_progress(f"时间序列图已保存: {output_path}")
|
||||
self.log_progress(f"时间序列图已保存为 {output_path},耗时 {time.time() - start_time:.2f}秒")
|
||||
|
||||
def plot_rainfall_spectrograms(self):
|
||||
"""并行绘制所有频谱雨点图(修复colorbar布局问题)"""
|
||||
self.log_progress("开始并行绘制频谱雨点图...")
|
||||
start_time = time.time()
|
||||
|
||||
# 准备加速度计数据
|
||||
self.log_progress("准备加速度计数据...")
|
||||
acc_signals = [(self.data[col], f'Acc {["X", "Y", "Z"][i]}')
|
||||
for i, col in enumerate(self.acc_columns) if i < 3] # 只处理前三个轴
|
||||
|
||||
# 准备陀螺仪数据(如果有)
|
||||
gyro_signals = []
|
||||
if self.gyro_columns:
|
||||
self.log_progress("准备陀螺仪数据...")
|
||||
gyro_signals = [(self.data[col], f'Gyro {["X", "Y", "Z"][i]}')
|
||||
for i, col in enumerate(self.gyro_columns) if i < 3] # 只处理前三个轴
|
||||
|
||||
# 如果没有数据可处理,直接返回
|
||||
if not acc_signals and not gyro_signals:
|
||||
self.log_progress("警告: 没有有效的数据列可供处理", "WARNING")
|
||||
return
|
||||
|
||||
# 使用多进程处理信号(避免线程冲突)
|
||||
self.log_progress("使用多进程并行处理...")
|
||||
all_signals = acc_signals + gyro_signals
|
||||
with Pool(processes=min(len(all_signals), cpu_count())) as pool:
|
||||
results = pool.map(self.process_signal, all_signals)
|
||||
|
||||
# 分离结果
|
||||
self.log_progress("分离结果...")
|
||||
acc_results = [r for r in results if r['axis'].startswith('Acc')]
|
||||
gyro_results = [r for r in results if r['axis'].startswith('Gyro')]
|
||||
|
||||
# 统一颜色标尺(5%-95%分位)
|
||||
if acc_results:
|
||||
self.log_progress("计算加速度计全局最小和最大值...")
|
||||
acc_all_Sxx = np.concatenate([r['Sxx_log'].ravel() for r in acc_results])
|
||||
acc_vmin, acc_vmax = np.percentile(acc_all_Sxx, [5, 95])
|
||||
|
||||
# 统一 DC Y 轴范围
|
||||
acc_dc_ymin, acc_dc_ymax = self.robust_dc_ylim(acc_results)
|
||||
self.log_progress(f"加速度 DC (dB) 范围: {acc_dc_ymin:.1f} 到 {acc_dc_ymax:.1f}")
|
||||
|
||||
if gyro_results:
|
||||
self.log_progress("计算陀螺仪全局最小和最大值...")
|
||||
gyro_all_Sxx = np.concatenate([r['Sxx_log'].ravel() for r in gyro_results])
|
||||
gyro_vmin, gyro_vmax = np.percentile(gyro_all_Sxx, [5, 95])
|
||||
|
||||
# 统一 DC Y 轴范围
|
||||
gyro_dc_ymin, gyro_dc_ymax = self.robust_dc_ylim(gyro_results)
|
||||
self.log_progress(f"陀螺仪 DC (dB) 范围: {gyro_dc_ymin:.1f} 到 {gyro_dc_ymax:.1f}")
|
||||
|
||||
# ========= 绘制加速度计频谱雨点图 =========
|
||||
if acc_results:
|
||||
self._plot_single_spectrogram(acc_results, acc_vmin, acc_vmax, acc_dc_ymin, acc_dc_ymax,
|
||||
'加速度', 'acc_rainfall_spectrogram')
|
||||
self.log_progress(f"加速度功率谱密度范围: {acc_vmin:.1f} dB 到 {acc_vmax:.1f} dB")
|
||||
|
||||
# ========= 绘制陀螺仪频谱雨点图 =========
|
||||
if gyro_results:
|
||||
self._plot_single_spectrogram(gyro_results, gyro_vmin, gyro_vmax, gyro_dc_ymin, gyro_dc_ymax,
|
||||
'角速度', 'gyro_rainfall_spectrogram')
|
||||
self.log_progress(f"陀螺仪功率谱密度范围: {gyro_vmin:.1f} dB 到 {gyro_vmax:.1f} dB")
|
||||
|
||||
total_time = time.time() - start_time
|
||||
self.log_progress(f"频谱雨点图生成完成,总耗时 {total_time:.2f}秒")
|
||||
|
||||
def _plot_single_spectrogram(self, results, vmin, vmax, dc_ymin, dc_ymax, title_prefix, filename_prefix):
|
||||
"""绘制单个频谱雨点图"""
|
||||
rows = len(results)
|
||||
fig = plt.figure(constrained_layout=True, figsize=(14, 4 * rows), dpi=150)
|
||||
gs = fig.add_gridspec(nrows=rows, ncols=2, width_ratios=[22, 1], wspace=0.05, hspace=0.12)
|
||||
|
||||
axes_main = []
|
||||
axes_cbar = []
|
||||
for i in range(rows):
|
||||
axes_main.append(fig.add_subplot(gs[i, 0]))
|
||||
axes_cbar.append(fig.add_subplot(gs[i, 1]))
|
||||
|
||||
for i, result in enumerate(results):
|
||||
ax = axes_main[i]
|
||||
cax = axes_cbar[i]
|
||||
|
||||
sc = ax.scatter(
|
||||
np.repeat(result['t'], len(result['f'])),
|
||||
np.tile(result['f'], len(result['t'])),
|
||||
c=result['Sxx_log'].T.ravel(),
|
||||
cmap='jet',
|
||||
s=3,
|
||||
alpha=0.7,
|
||||
vmin=vmin,
|
||||
vmax=vmax,
|
||||
rasterized=True
|
||||
)
|
||||
|
||||
ax.set_title(f'{title_prefix}频谱雨点图 - {result["axis"][-1]}(右侧为DC分量 dB)', fontsize=10)
|
||||
ax.set_xlabel('时间 (s)', fontsize=9)
|
||||
ax.set_ylabel('频率 (Hz)', fontsize=9)
|
||||
ax.set_ylim(0, self.sampling_rate / 2)
|
||||
ax.grid(True, linestyle=':', alpha=0.4)
|
||||
|
||||
ax2 = ax.twinx()
|
||||
ax2.plot(result['t'], result['dc_log'], color='black', linewidth=1.2, alpha=0.85, label='DC (dB)')
|
||||
ax2.set_ylabel('直流分量 (dB)', fontsize=9, color='black')
|
||||
ax2.set_ylim(dc_ymin, dc_ymax)
|
||||
ax2.tick_params(axis='y', labelcolor='black')
|
||||
ax2.yaxis.set_major_locator(MaxNLocator(nbins=6))
|
||||
ax2.grid(False)
|
||||
ax2.legend(loc='upper right', fontsize=8, framealpha=0.5)
|
||||
|
||||
cbar = fig.colorbar(sc, cax=cax)
|
||||
cbar.set_label('功率谱密度 (dB)', fontsize=9)
|
||||
cax.tick_params(labelsize=8)
|
||||
|
||||
output_path = os.path.join(self.output_dir, f'{filename_prefix}_{self.timestamp}.png')
|
||||
plt.savefig(output_path, bbox_inches='tight', dpi=150)
|
||||
plt.close(fig)
|
||||
self.log_progress(f"{title_prefix}频谱雨点图已保存为 {output_path}")
|
||||
|
||||
def run_analysis(self):
|
||||
"""运行完整分析流程"""
|
||||
try:
|
||||
self.log_progress("开始数据分析流程", "INFO")
|
||||
start_time = time.time()
|
||||
|
||||
self.load_data()
|
||||
self.plot_time_series()
|
||||
self.plot_rainfall_spectrograms()
|
||||
|
||||
total_time = time.time() - start_time
|
||||
self.log_progress(f"分析完成,总耗时 {total_time:.2f}秒", "SUCCESS")
|
||||
self.log_progress(f"所有输出文件已保存到: {self.output_dir}", "INFO")
|
||||
return True
|
||||
|
||||
except ValueError as e:
|
||||
# 跳过不包含IMU数据的文件
|
||||
self.log_progress(f"跳过文件: {str(e)}", "WARNING")
|
||||
return False
|
||||
except Exception as e:
|
||||
self.log_progress(f"分析过程中出现错误: {str(e)}", "ERROR")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
|
||||
def process_single_file(file_path):
|
||||
"""处理单个文件的函数(使用进程隔离)"""
|
||||
try:
|
||||
print(f"{Fore.BLUE}开始处理文件: {os.path.basename(file_path)}")
|
||||
analyzer = IMUDataAnalyzer(file_path)
|
||||
success = analyzer.run_analysis()
|
||||
if success:
|
||||
return (file_path, True, "处理成功")
|
||||
else:
|
||||
return (file_path, False, "文件不包含IMU数据,已跳过")
|
||||
except Exception as e:
|
||||
return (file_path, False, str(e))
|
||||
|
||||
|
||||
def main():
|
||||
"""主函数,支持多文件处理和进度显示"""
|
||||
print("=" * 60)
|
||||
print(f"{Fore.CYAN}IMU数据频谱分析工具 - 多文件批量处理")
|
||||
print("=" * 60)
|
||||
|
||||
# 获取输入路径
|
||||
print(f"{Fore.WHITE}请输入包含CSV文件的目录路径: ")
|
||||
input_path = input("> ").strip()
|
||||
|
||||
if not os.path.exists(input_path):
|
||||
print(f"{Fore.RED}错误: 路径 '{input_path}' 不存在!")
|
||||
return
|
||||
|
||||
# 查找所有包含imu的CSV文件(不区分大小写)
|
||||
if os.path.isdir(input_path):
|
||||
# 使用单个glob模式匹配所有文件,然后过滤包含imu的文件
|
||||
all_csv_files = glob.glob(os.path.join(input_path, "**", "*.csv"), recursive=True)
|
||||
csv_files = [f for f in all_csv_files if re.search(r'imu', f, re.IGNORECASE)]
|
||||
csv_files = list(set(csv_files)) # 去重
|
||||
csv_files.sort()
|
||||
else:
|
||||
# 对于单个文件,检查是否包含imu(不区分大小写)
|
||||
if re.search(r'imu', input_path, re.IGNORECASE):
|
||||
csv_files = [input_path]
|
||||
else:
|
||||
csv_files = []
|
||||
|
||||
if not csv_files:
|
||||
print(f"{Fore.YELLOW}警告: 未找到包含'imu'的CSV文件")
|
||||
return
|
||||
|
||||
print(f"{Fore.GREEN}找到 {len(csv_files)} 个IMU数据文件:")
|
||||
for i, file in enumerate(csv_files, 1):
|
||||
print(f" {i}. {os.path.basename(file)}")
|
||||
|
||||
# 使用多进程处理文件(避免matplotlib线程冲突)
|
||||
print(f"\n{Fore.CYAN}开始多线程处理文件 (使用 {min(len(csv_files), cpu_count())} 个线程)...")
|
||||
|
||||
success_count = 0
|
||||
skipped_count = 0
|
||||
failed_files = []
|
||||
|
||||
# 使用ProcessPoolExecutor而不是ThreadPoolExecutor
|
||||
with ProcessPoolExecutor(max_workers=min(len(csv_files), cpu_count())) as executor:
|
||||
# 提交所有任务
|
||||
future_to_file = {executor.submit(process_single_file, file): file for file in csv_files}
|
||||
|
||||
# 处理完成的任务
|
||||
for future in as_completed(future_to_file):
|
||||
file_path = future_to_file[future]
|
||||
try:
|
||||
result = future.result()
|
||||
file_path, success, message = result
|
||||
if success:
|
||||
print(f"{Fore.GREEN}✓ 完成: {os.path.basename(file_path)}")
|
||||
success_count += 1
|
||||
else:
|
||||
if "跳过" in message:
|
||||
print(f"{Fore.YELLOW}↷ 跳过: {os.path.basename(file_path)} - {message}")
|
||||
skipped_count += 1
|
||||
else:
|
||||
print(f"{Fore.RED}✗ 失败: {os.path.basename(file_path)} - {message}")
|
||||
failed_files.append((file_path, message))
|
||||
except Exception as e:
|
||||
print(f"{Fore.RED}✗ 异常: {os.path.basename(file_path)} - {str(e)}")
|
||||
failed_files.append((file_path, str(e)))
|
||||
|
||||
# 输出统计信息
|
||||
print(f"\n{Fore.CYAN}处理完成统计:")
|
||||
print(f"{Fore.GREEN}成功: {success_count} 个文件")
|
||||
print(f"{Fore.YELLOW}跳过: {skipped_count} 个文件(不包含IMU数据)")
|
||||
print(f"{Fore.RED}失败: {len(failed_files)} 个文件")
|
||||
|
||||
if failed_files:
|
||||
print(f"\n{Fore.YELLOW}失败文件详情:")
|
||||
for file, error in failed_files:
|
||||
print(f" {os.path.basename(file)}: {error}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main()
|
||||
except KeyboardInterrupt:
|
||||
print(f"\n{Fore.YELLOW}用户中断程序执行")
|
||||
except Exception as e:
|
||||
print(f"{Fore.RED}程序运行出错: {str(e)}")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
6
ICCIDupdata/.gitignore
vendored
Normal file
6
ICCIDupdata/.gitignore
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
/build/*
|
||||
/build
|
||||
/dist/*
|
||||
/dist
|
||||
/source/*
|
||||
/source
|
||||
90
ICCIDupdata/ICCIDtest_V1.py
Normal file
90
ICCIDupdata/ICCIDtest_V1.py
Normal file
@@ -0,0 +1,90 @@
|
||||
import requests
|
||||
import hashlib
|
||||
import time
|
||||
|
||||
import json
|
||||
|
||||
def generate_sign(system_id, request_info, request_time, secret_key):
|
||||
"""生成签名"""
|
||||
params = {
|
||||
'requestInfo': request_info,
|
||||
'requestTime': request_time,
|
||||
'systemId': system_id
|
||||
}
|
||||
# 按字典序排序
|
||||
sorted_params = '&'.join([f"{k}={v}" for k, v in sorted(params.items())])
|
||||
sign_str = sorted_params + secret_key
|
||||
return hashlib.md5(sign_str.encode()).hexdigest()
|
||||
|
||||
|
||||
def test_navp_interface():
|
||||
# 需要向HT获取系统密钥
|
||||
secret_key = "aqwec3be422c22a752c22"
|
||||
|
||||
# url = "https://flow-gateway.pre.aeroht.com/server/oem/navp/infoUpload"
|
||||
url = "https://flow-gateway.pre.aeroht.com/server/oem/nav/infoUpload"
|
||||
|
||||
# 测试数据
|
||||
request_info = '{"iccid":"navp345678112300001","partsNo":"F34410001X3K-00-02","hVer":"F34410001X3K-00-02","sVer":"F34410001X3K0P001","network":"AG35CEVFMR12A02T4G&864169079532089","soc":"NA","sn":"F34410001X3K00024013683HJ00170"}'
|
||||
# system_id = "navpFactory"
|
||||
system_id = "diufactory"
|
||||
request_time = time.strftime("%Y-%m-%d %H:%M:%S")
|
||||
print(f"request_time:{request_time}")
|
||||
# request_time = time.strftime("%Y/%m/%d %H:%M")
|
||||
|
||||
# 生成签名
|
||||
sign = generate_sign(system_id, request_info, request_time, secret_key)
|
||||
|
||||
data = {
|
||||
"systemId": system_id,
|
||||
"requestInfo": request_info,
|
||||
"requestTime": request_time,
|
||||
"sign": sign
|
||||
}
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/x-www-form-urlencoded"
|
||||
}
|
||||
|
||||
try:
|
||||
# response = requests.post(url, data=data, headers=headers, timeout=30)
|
||||
print(f"data|requestInfo:{data['requestInfo']}")
|
||||
response = requests.request("POST",url, data=data, headers=headers, timeout=30)
|
||||
|
||||
if(response.status_code == 200) :
|
||||
print(f"NAVP 接口测试 OK")
|
||||
else :
|
||||
print(f"NAVP 接口测试 NG")
|
||||
|
||||
print(f"NAVP接口响应状态码: {response.status_code}")
|
||||
print(f"NAVP接口响应内容: {response.text}")
|
||||
|
||||
return response.status_code == 200
|
||||
except Exception as e:
|
||||
print(f"NAVP接口请求失败: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def test_navs_interface():
|
||||
# NAVS接口测试(类似NAVP,只需修改systemId和url)
|
||||
# 实现逻辑与test_navp_interface类似
|
||||
# pass
|
||||
url = "https://flow-gateway.pre.aeroht.com/server/oem/nav/infoUpload?requestInfo=%7B%22iccid%22:%22navp345678112300001%22,%22partsNo%22:%22parts111%22,%22hVer%22:%22hVer_7d98d056c96e22222%22,%22sVer%22:%22sVer_b38651e22222%22,%22soc%22:%22111%22,%22network%22:%222222%22%7D&systemId=diufactory&requestTime=2026-01-14%2017:19:58&sign=f480924ff291e0f98a4fb9fdd0167a3e&appSecret=aqwec3be422c22a752c22"
|
||||
|
||||
payload = {}
|
||||
headers = {
|
||||
'Cookie': 'cookies=mg3Tr49e6qr2eIHbvmiHp9NXJa56ei5vh4CeDbcRaEH450bqgdWLrHYHIgaZX3A7CXB9l0X3c1i+9D96HFAFjSCIA58vVLNpM2EtDixW67CQVOpinLaIMEcnr4wSqtaHjOvpw+XVvm+nB3LE2C5AH/qpSULCgySiX3ET7BQV0PSZkGUfWs2z6PqLSPa7ta9jr18otqVkK7y2zKdsdc4YkYq2jbZldPXm8cXufRCUqdvXoR2QzMoN+/gu6vBKtSXHSlyaTCC/aay+i64ChV4iNXrKlfHHj9MswdrzAazFvZXoDNMTMW00TEbev9DDbcTXVUdbjxidZM4Qk8xIMcpaR07l1ruHLLd2gmYZKRarBAxhrGXGWvJtm5EV1N0AgO3t9sSWhsyWNKKPijgMmUhYyOzKoxD3cvSZ2vGnI5iojb9W6U+cT3A98W81ENYs3yyrEZamJOAbbwAi+zpcCmxI/wcWq32HXgiYLxJ4pmaNlIlW+h8a4tGfTpxAR/WrG/SPN/HoMPohV1INDkllXkurrijH8ZeAQmF+lVepFfBcC9dPrkB7RBAUF/P0FIqjoAXVf6ULLoUvyHcD92vIPDVT4UPW7XGT7FRxtNoBMXhKJ9fOosn+ofuskmOWS1pQsAe5zY7fM/uE7VRrS/AaUt94hKSTJO0p94dPeRRxMt5zDe+Fe4M+wvE5SKaE++C6ZrSNqsuYq1RhcQS26PR90xvq9+OT3HX1r7vGakFIGNXzW/Gd3+QF7+5oGDQHzc6WjFAsQBs1HkntPcJpsVbE24r6kiGzMxgCNTzreqJXDYmyePETDKj75bb/K5E18Zeo83vF3zk2vVrxxefwbvaGWyeRJJW6sQv8kyaIpyNVPQOhetlpDV4RHVzja869fEIl1zOdNQWkU+7F/gCBfcUS79RIaC5psIDofx28E7TIhfanh41OU+TtBXNXEVYqf/7NDc3q+1pLnenogmFvSSG4qE0iSGUapL9iTaDXjlOyYkS39keVog/AHrVfDYMTzBWeko2YJmpLExUwLeXfwL3xRI41yuuBz2eEuQhyIMwxxQWHkptPFR9Cn6TfjDwYBVIxIzrEBFc6E14VmZQ/zNloS2n66Z45ivuaRpagMaWo7+cqSX0CQvQ8SJQ+5k4i7pnXzhSq8fxiLSa0wIvfrnDlwd7WS6oe0nKqyMInt/iGMqUiAVmrlduHhlrIweHkM/E7pVaURVI38R5WNOOYBgHV8CTUwi2FLwsZbEkD0ElJDhEkeHfWHxmn9XkIVU+XD3/OZp+IRCYBWr3t6+iPJqosp75eWNfST5kCzP/bye+h5vRjuvRdnnMhekyd9MY3yPPbz5JJ42CTrkjyAGIyiycQNI4mFIWB9nlM3hXoywoO+FDd2CFIMvwkdH+GXEvbVNR/il3O94jRS+kF3v/i8WBPDzUSP0aHAQPEAvzyIsxY/6WrOcAuuA7Cy0qeIzjI0Wzjv/QbOEgtHk7kR6+pgptQtVFgU4EldoQFnuZEPPQdbw1OAhxUKfyIuljTKq8FbZ95aHX5fFQ6POzgIgUFYCwVqRHkVn6dwHLkDXcOMhfXvw+5q23k0GGCFUPwFM+6ypZBoKKCRTZ60e0IUOq3afVls/UUgGnDQp4pT/BXhLYhICGH8cZw+sNxHLddehuepi4PI8fq60e+H6RfE7xxk+LRlVNyI0TTi+NuESQr+UzX7GIvVkiiwgQKrUPafqBbDS6L2890tVXt1un1UH5hW9GuE+uftclBWqvGnYZUUrHQ42eAr1c8xvunaTINVU24nBlVFUPeh3x34RsjldTkrYeIkk9v0tz8T7ndWi6qxv/03u9YBlMRcJozgDnovVx/tNH7J0f6j6Sq1RNkhxVvRe6SPAgS3mvz5MLcMLw9pWCTSOf8NVDbSuV5NpOm+f5mhU9u/5tLfXgznJSmu9UW6WWx4PgiPTB0jHELrYnDQiiDRqVDFixaHzPZ6t9CKJp088NXrLamFfOYfd3e2S6xEu7aUHBKR2vnscQfl5awuzWD8uVh3sHcK/N4f2wregqra3YaSgme',
|
||||
'User-Agent': 'Apifox/1.0.0 (https://apifox.com)',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
response = requests.request("POST", url, headers=headers, data=payload)
|
||||
|
||||
print(response.text)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("开始测试预发布环境...")
|
||||
navp_result = test_navp_interface()
|
||||
# navs_result = test_navs_interface()
|
||||
9
IMULinkdata/.gitignore
vendored
Normal file
9
IMULinkdata/.gitignore
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
/build/*
|
||||
/build
|
||||
/dist/*
|
||||
/dist
|
||||
/source/*
|
||||
/source
|
||||
|
||||
|
||||
LINLinkData_V2.py
|
||||
252
IMULinkdata/LINLinkData_V1.py
Normal file
252
IMULinkdata/LINLinkData_V1.py
Normal file
@@ -0,0 +1,252 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
import argparse
|
||||
|
||||
import re
|
||||
import time
|
||||
import argparse
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import openpyxl
|
||||
|
||||
|
||||
class ExcelProcessor:
|
||||
def __init__(self, file_path):
|
||||
self.file_path = file_path
|
||||
self.df = None
|
||||
self.output_folder = None
|
||||
self.output_file = None
|
||||
self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
self.processed_data = {} # 存储处理后的数据
|
||||
|
||||
|
||||
def load_data(self):
|
||||
"""加载Excel文件数据"""
|
||||
print(f"正在加载文件: {self.file_path}")
|
||||
try:
|
||||
# 尝试读取指定sheet,如果不存在则尝试读取第一个sheet
|
||||
try:
|
||||
# 建议使用 engine='openpyxl',pandas 会尽可能把 Excel 的日期单元格读成 datetime
|
||||
# self.df = pd.read_excel(self.file_path, sheet_name='LinkdataAll', engine='openpyxl')
|
||||
|
||||
# 获取所有工作表名称
|
||||
sheet_names = pd.ExcelFile(self.file_path, engine='openpyxl').sheet_names
|
||||
# 查找包含'LINK'的工作表(不区分大小写)
|
||||
target_sheet = next((sheet for sheet in sheet_names if 'link' in sheet.lower()), None)
|
||||
if target_sheet is None:
|
||||
raise ValueError(f"未找到包含'LINK'的工作表")
|
||||
|
||||
self.df = pd.read_excel(self.file_path, sheet_name=target_sheet, engine='openpyxl')
|
||||
except Exception as e:
|
||||
print("提示: 未找到包含'LINK' sheet,请检查文件内容。")
|
||||
return False
|
||||
|
||||
# 确保有 PartNumber 列(兼容 LinkObject)
|
||||
if 'PartNumber' not in self.df.columns and 'LinkObject' in self.df.columns:
|
||||
self.df['PartNumber'] = self.df['LinkObject']
|
||||
|
||||
# 检查必要的列是否存在
|
||||
required_cols = ["PartNumber", "ChildSN", "linkDate"]
|
||||
missing = [c for c in required_cols if c not in self.df.columns]
|
||||
if missing:
|
||||
raise ValueError(f"数据表中缺少必要列: {', '.join(missing)}")
|
||||
|
||||
# 解析 linkDate 为 datetime(支持 AM/PM)
|
||||
# 注:pd.to_datetime 能解析大多数常见格式,包括 "5/24/2025 6:00:13 PM"
|
||||
# 如果有极端异构格式,可在这里加更精细的清洗逻辑
|
||||
# errors='coerce' 会把无法解析的值变为 NaT
|
||||
# self.df['linkDate'] = pd.to_datetime(self.df['linkDate'], errors='coerce')
|
||||
|
||||
self.df['linkDate'] = pd.to_datetime(
|
||||
self.df['linkDate'],
|
||||
format='%m/%d/%Y %I:%M:%S %p', # 月/日/年 12小时制+AM/PM
|
||||
errors='coerce'
|
||||
)
|
||||
|
||||
# 提示解析情况
|
||||
total = len(self.df)
|
||||
invalid = int(self.df['linkDate'].isna().sum())
|
||||
print(f"文件加载成功,总行数: {total},日期解析失败: {invalid} 行")
|
||||
|
||||
# 添加备注列
|
||||
if '备注' not in self.df.columns:
|
||||
self.df['备注'] = ''
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"加载文件失败: {str(e)}")
|
||||
return False
|
||||
|
||||
def create_output_folder(self):
|
||||
"""准备输出目录和文件名"""
|
||||
|
||||
# 先去除扩展名,再截取前10个字符
|
||||
# base_name = os.path.splitext(os.path.basename(self.file_path))[0]
|
||||
original_name = os.path.splitext(os.path.basename(self.file_path))[0]
|
||||
|
||||
# base_name = original_name[:10]
|
||||
base_name = original_name[:20]
|
||||
|
||||
output_folder_name = f"{base_name} output_{self.timestamp}"
|
||||
|
||||
# self.output_folder = os.path.join(os.path.dirname(self.file_path), output_folder_name)
|
||||
self.output_folder = os.path.dirname(self.file_path)
|
||||
|
||||
self.output_file = os.path.join(self.output_folder, f"{original_name}_split_by_PartNumber_{self.timestamp}.xlsx")
|
||||
|
||||
if not os.path.exists(self.output_folder):
|
||||
os.makedirs(self.output_folder)
|
||||
print(f"已创建输出文件夹: {self.output_folder}")
|
||||
|
||||
|
||||
def _safe_sheet_name(self, name):
|
||||
"""清理为合法的 Excel sheet 名称(<=31字符,无非法字符)"""
|
||||
# 转为字符串
|
||||
s = str(name)
|
||||
# 替换非法字符:: \ / ? * [ ]
|
||||
s = re.sub(r'[:\\/\?\*\[\]]', '_', s)
|
||||
# 去除首尾空格
|
||||
s = s.strip()
|
||||
# 截断到 31 个字符
|
||||
if len(s) > 31:
|
||||
s = s[:31]
|
||||
# 空名兜底
|
||||
if not s:
|
||||
s = 'Sheet'
|
||||
return s
|
||||
|
||||
def process_data(self):
|
||||
"""处理数据并拆分到不同sheet"""
|
||||
if self.df is None:
|
||||
raise ValueError("数据未加载,请先调用 load_data() 方法")
|
||||
|
||||
# 确保有PartNumber列
|
||||
if 'PartNumber' not in self.df.columns:
|
||||
if 'LinkObject' in self.df.columns:
|
||||
self.df['PartNumber'] = self.df['LinkObject']
|
||||
else:
|
||||
raise ValueError("数据表中既没有PartNumber也没有LinkObject列")
|
||||
|
||||
# 添加备注列
|
||||
self.df['备注'] = ''
|
||||
|
||||
# 按 PartNumber 分组
|
||||
grouped = self.df.groupby('PartNumber', dropna=False)
|
||||
total_groups = len(grouped)
|
||||
print(f"开始处理数据,共 {total_groups} 个分组...")
|
||||
|
||||
# 使用上下文管理器,自动保存关闭
|
||||
# print(f"输出文件信息,self.output_folder:{self.output_folder}")
|
||||
print(f"输出文件信息,self.output_file:{self.output_file}")
|
||||
# output_path = os.path.join(self.output_folder, self.output_file)
|
||||
output_path = self.output_file
|
||||
|
||||
writer = pd.ExcelWriter(output_path, engine='openpyxl')
|
||||
|
||||
for i, (name, group) in enumerate(grouped):
|
||||
print(f"正在处理分组 {i + 1}/{total_groups}: {name}")
|
||||
|
||||
# 处理重复 ChildSN(根据最新 linkDate 保留一条)
|
||||
group_processed = self.process_duplicates(group)
|
||||
|
||||
# 输出前,把 linkDate 格式化为 yyyy-mm-dd hh:mm:ss 的字符串
|
||||
group_out = group_processed.copy()
|
||||
group_out['linkDate'] = group_out['linkDate'].apply(
|
||||
lambda x: x.strftime('%Y-%m-%d %H:%M:%S') if pd.notnull(x) else ''
|
||||
)
|
||||
|
||||
# 写入sheet
|
||||
safe_name = self._safe_sheet_name(name)
|
||||
group_out.to_excel(writer, sheet_name=safe_name, index=False)
|
||||
|
||||
# 保存文件
|
||||
writer.close()
|
||||
print(f"处理完成! 结果已保存到: {output_path}")
|
||||
|
||||
def process_duplicates(self, group):
|
||||
"""处理重复的 ChildSN,优化备注信息:保留最新 linkDate 的一行"""
|
||||
# 找出重复 ChildSN
|
||||
duplicates = group[group.duplicated('ChildSN', keep=False)]
|
||||
|
||||
if not duplicates.empty:
|
||||
print(f" 发现 {len(duplicates)} 行重复数据,正在处理...")
|
||||
|
||||
# 遍历每个重复 ChildSN 的分组
|
||||
for child_sn, dup_group in duplicates.groupby('ChildSN'):
|
||||
# 按 linkDate 排序,保留最新(降序)
|
||||
# 若 linkDate 有 NaT,会排在末尾
|
||||
dup_group = dup_group.sort_values('linkDate', ascending=False)
|
||||
|
||||
# 获取最新行
|
||||
latest_row = dup_group.iloc[0]
|
||||
|
||||
# 差异字段收集(除 ChildSN、备注)
|
||||
diff_info = {}
|
||||
for col in dup_group.columns:
|
||||
if col in ['ChildSN', '备注']:
|
||||
continue
|
||||
unique_values = dup_group[col].unique()
|
||||
if len(unique_values) > 1:
|
||||
# 对 linkDate 做专门格式化,其他列保持原样转字符串
|
||||
if col == 'linkDate':
|
||||
vals = []
|
||||
for v in unique_values:
|
||||
if pd.isna(v):
|
||||
vals.append('')
|
||||
elif isinstance(v, pd.Timestamp):
|
||||
vals.append(v.strftime('%Y-%m-%d %H:%M:%S'))
|
||||
else:
|
||||
vals.append(str(v))
|
||||
diff_info[col] = f"{col}: {', '.join(vals)}"
|
||||
else:
|
||||
diff_info[col] = f"{col}: {', '.join(map(str, unique_values))}"
|
||||
|
||||
# 生成备注信息
|
||||
note = f"重复行数: {len(dup_group)}"
|
||||
if diff_info:
|
||||
note += "; 差异内容: " + "; ".join(diff_info.values())
|
||||
|
||||
# 更新最新行的备注(给该 ChildSN 的所有行先写备注,再删除)
|
||||
group.loc[group['ChildSN'] == child_sn, '备注'] = note
|
||||
|
||||
# 删除除最新以外的行
|
||||
drop_indices = dup_group.index[1:]
|
||||
group = group.drop(drop_indices)
|
||||
|
||||
return group
|
||||
|
||||
|
||||
def main():
|
||||
print("=== Excel拆分工具 ===")
|
||||
file_path = input("请输入Excel文件路径: ").strip('"')
|
||||
if not os.path.exists(file_path):
|
||||
print("文件不存在,请检查路径")
|
||||
return
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
# 创建处理器实例
|
||||
processor = ExcelProcessor(file_path)
|
||||
|
||||
# 执行处理流程
|
||||
if not processor.load_data():
|
||||
return
|
||||
|
||||
processor.create_output_folder()
|
||||
processor.process_data()
|
||||
|
||||
print("所有处理已完成!")
|
||||
except Exception as e:
|
||||
print(f"处理过程中发生错误: {e}")
|
||||
|
||||
end_time = time.time()
|
||||
print(f"总耗时: {end_time - start_time:.2f}秒")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
20
dataProcess/.gitignore
vendored
Normal file
20
dataProcess/.gitignore
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
/build/*
|
||||
/build
|
||||
/dist/*
|
||||
/dist
|
||||
/source/*
|
||||
/source
|
||||
/dataProcess_out*
|
||||
*.xls
|
||||
*.xlsx
|
||||
*.csv
|
||||
*.spec
|
||||
|
||||
/temp
|
||||
|
||||
dataProcess_html_V2.py
|
||||
|
||||
dataProcess_sightml_V2.py
|
||||
dataProcess_sightml_V3.py
|
||||
|
||||
dataProcessMerge_V2.py
|
||||
475
dataProcess/dataProcessMerge_V1.py
Normal file
475
dataProcess/dataProcessMerge_V1.py
Normal file
@@ -0,0 +1,475 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
from tkinter import filedialog, Tk
|
||||
import logging
|
||||
import datetime
|
||||
# --- 新增导入 ---
|
||||
from colorama import init, Fore, Style
|
||||
import sys
|
||||
|
||||
# 初始化 colorama,autoreset=True 使得每次打印后自动恢复默认颜色
|
||||
init(autoreset=True)
|
||||
|
||||
# --- 自定义日志格式化器 ---
|
||||
class ColoredFormatter(logging.Formatter):
|
||||
"""根据日志级别为控制台输出添加颜色"""
|
||||
|
||||
# 定义颜色
|
||||
COLORS = {
|
||||
'DEBUG': Fore.CYAN,
|
||||
'INFO': Fore.GREEN,
|
||||
'WARNING': Fore.YELLOW,
|
||||
'ERROR': Fore.RED,
|
||||
'CRITICAL': Fore.RED + Style.BRIGHT,
|
||||
}
|
||||
|
||||
def format(self, record):
|
||||
# 获取对应级别的颜色
|
||||
log_color = self.COLORS.get(record.levelname, '')
|
||||
# 应用颜色到整个记录
|
||||
record.levelname = f"{log_color}{record.levelname}{Style.RESET_ALL}"
|
||||
record.msg = f"{log_color}{record.msg}{Style.RESET_ALL}"
|
||||
# 使用父类的格式化方法
|
||||
return super().format(record)
|
||||
|
||||
# --- 配置日志 ---
|
||||
# 创建 logger 对象
|
||||
logger = logging.getLogger() # 获取根 logger
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
# 移除默认的 handlers(如果有的话),避免重复输出
|
||||
if logger.handlers:
|
||||
logger.handlers.clear()
|
||||
|
||||
# 创建控制台 handler
|
||||
console_handler = logging.StreamHandler(sys.stdout) # 使用 sys.stdout 通常更好
|
||||
console_handler.setLevel(logging.INFO)
|
||||
|
||||
# 创建并设置 formatter
|
||||
formatter = ColoredFormatter('%(asctime)s - %(levelname)s - %(message)s')
|
||||
console_handler.setFormatter(formatter)
|
||||
|
||||
# 将 handler 添加到 logger
|
||||
logger.addHandler(console_handler)
|
||||
# --- 日志配置结束 ---
|
||||
|
||||
|
||||
class DataProcessor:
|
||||
def __init__(self):
|
||||
self.spec_file = None
|
||||
self.data_folder = None
|
||||
self.spec_data = None
|
||||
self.data_files = []
|
||||
self.merged_data = pd.DataFrame()
|
||||
|
||||
def select_spec_file(self):
|
||||
"""选择上限和下限规格要求文件"""
|
||||
root = Tk()
|
||||
root.withdraw()
|
||||
self.spec_file = filedialog.askopenfilename(
|
||||
title="选择上限和下限规格要求文件",
|
||||
filetypes=[("CSV files", "*.csv"), ("All files", "*.*")]
|
||||
)
|
||||
if not self.spec_file:
|
||||
logging.error("未选择规格文件")
|
||||
return False
|
||||
logging.info(f"已选择规格文件: {self.spec_file}")
|
||||
return True
|
||||
|
||||
def select_data_folder(self):
|
||||
"""选择实际数据文件所在的文件夹"""
|
||||
root = Tk()
|
||||
root.withdraw()
|
||||
self.data_folder = filedialog.askdirectory(title="选择实际数据文件所在的文件夹")
|
||||
if not self.data_folder:
|
||||
logging.error("未选择数据文件夹")
|
||||
return False
|
||||
logging.info(f"已选择数据文件夹: {self.data_folder}")
|
||||
return True
|
||||
|
||||
def clean_column_names(self, df):
|
||||
"""清理列名,去除前后空格和特殊字符"""
|
||||
df.columns = [col.strip() for col in df.columns]
|
||||
return df
|
||||
|
||||
def load_spec_data(self):
|
||||
"""加载规格数据,标题行为第3行"""
|
||||
try:
|
||||
# 读取CSV文件,跳过前2行,第3行作为标题
|
||||
self.spec_data = pd.read_csv(self.spec_file, header=2)
|
||||
|
||||
# 清理列名
|
||||
self.spec_data = self.clean_column_names(self.spec_data)
|
||||
|
||||
# 确保PAD ID列是字符串类型
|
||||
if 'PAD ID' in self.spec_data.columns:
|
||||
self.spec_data['PAD ID'] = self.spec_data['PAD ID'].astype(str).str.strip()
|
||||
|
||||
# 检查必要的列是否存在
|
||||
required_columns = ["PAD ID", "Component ID", "Vol_Min(%)", "Vol_Max(%)",
|
||||
"Height_Low(mil)", "Height_High(mil)", "Area_Min(%)", "Area_Max(%)"]
|
||||
|
||||
missing_columns = [col for col in required_columns if col not in self.spec_data.columns]
|
||||
if missing_columns:
|
||||
logging.warning(f"规格文件中缺少以下列: {missing_columns}")
|
||||
# 尝试查找相似的列名
|
||||
for missing_col in missing_columns:
|
||||
similar_cols = [col for col in self.spec_data.columns if missing_col.lower() in col.lower()]
|
||||
if similar_cols:
|
||||
logging.info(f"可能匹配的列: {similar_cols}")
|
||||
|
||||
# 特别检查 Component ID 是否存在
|
||||
if "Component ID" not in self.spec_data.columns:
|
||||
logging.warning("'Component ID' 列在规格文件中缺失,这可能导致输出文件中也缺少该列。")
|
||||
|
||||
logging.info(f"规格数据加载成功,共 {len(self.spec_data)} 行")
|
||||
logging.info(f"规格文件列名: {list(self.spec_data.columns)}")
|
||||
logging.info(
|
||||
f"规格文件PAD ID数据类型: {self.spec_data['PAD ID'].dtype if 'PAD ID' in self.spec_data.columns else 'N/A'}")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"加载规格数据失败: {e}")
|
||||
return False
|
||||
return True
|
||||
|
||||
def scan_data_files(self):
|
||||
"""扫描数据文件夹中的CSV文件,并检查标题行是否包含有效字段"""
|
||||
try:
|
||||
# 定义有效的字段名称(去除前后空格)
|
||||
required_fields = [
|
||||
"PAD ID", "Component ID", "Height(mil)", "Volume(%)",
|
||||
"Area(%)", "Volume(mil3)", "Area(mil2)"
|
||||
]
|
||||
|
||||
# 可选:定义字段匹配的宽松程度
|
||||
field_match_threshold = 0.8 # 80%的字段匹配即认为有效
|
||||
|
||||
# 扫描CSV文件
|
||||
valid_files = []
|
||||
for file in os.listdir(self.data_folder):
|
||||
if file.endswith(".csv") and "F27140015X3K" in file:
|
||||
file_path = os.path.join(self.data_folder, file)
|
||||
|
||||
# 检查文件是否可读且包含有效字段
|
||||
if self._is_valid_csv_file(file_path, required_fields, field_match_threshold):
|
||||
valid_files.append(file_path)
|
||||
|
||||
self.data_files = valid_files
|
||||
logging.info(
|
||||
f"找到 {len(self.data_files)} 个有效数据文件: {[os.path.basename(f) for f in self.data_files]}")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"扫描数据文件失败: {e}")
|
||||
return False
|
||||
|
||||
return True if self.data_files else False
|
||||
|
||||
def _is_valid_csv_file(self, file_path, required_fields, threshold=1.0):
|
||||
"""检查CSV文件是否包含必需的字段"""
|
||||
try:
|
||||
# 尝试不同的编码
|
||||
encodings = ['utf-8', 'gbk', 'latin-1']
|
||||
|
||||
for encoding in encodings:
|
||||
try:
|
||||
with open(file_path, 'r', encoding=encoding) as f:
|
||||
first_line = f.readline().strip()
|
||||
|
||||
# 解析CSV标题行
|
||||
headers = [header.strip() for header in first_line.split(',')]
|
||||
|
||||
# 计算匹配的字段数量
|
||||
matched_fields = 0
|
||||
missing_fields = []
|
||||
|
||||
for required_field in required_fields:
|
||||
if required_field in headers:
|
||||
matched_fields += 1
|
||||
else:
|
||||
missing_fields.append(required_field)
|
||||
|
||||
# 计算匹配比例
|
||||
match_ratio = matched_fields / len(required_fields)
|
||||
|
||||
if match_ratio >= threshold:
|
||||
if missing_fields:
|
||||
logging.warning(
|
||||
f"文件 {os.path.basename(file_path)} 部分字段缺失: {missing_fields},但满足阈值要求")
|
||||
else:
|
||||
logging.info(f"文件 {os.path.basename(file_path)} 所有字段完整")
|
||||
return True
|
||||
else:
|
||||
logging.warning(
|
||||
f"文件 {os.path.basename(file_path)} 字段匹配率不足: {match_ratio:.1%},缺失字段: {missing_fields}")
|
||||
return False
|
||||
|
||||
except UnicodeDecodeError:
|
||||
continue # 尝试下一个编码
|
||||
|
||||
logging.error(f"无法读取文件 {os.path.basename(file_path)},尝试了所有编码")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"检查文件 {os.path.basename(file_path)} 时发生错误: {e}")
|
||||
return False
|
||||
|
||||
def load_and_clean_data_file(self, data_file):
|
||||
"""加载并清理数据文件"""
|
||||
try:
|
||||
# 读取数据文件,第一行作为标题
|
||||
# 处理可能的编码问题
|
||||
try:
|
||||
data_df = pd.read_csv(data_file, header=0, encoding='utf-8')
|
||||
except UnicodeDecodeError:
|
||||
try:
|
||||
data_df = pd.read_csv(data_file, header=0, encoding='gbk')
|
||||
except UnicodeDecodeError:
|
||||
data_df = pd.read_csv(data_file, header=0, encoding='latin-1')
|
||||
|
||||
# 清理列名
|
||||
data_df = self.clean_column_names(data_df)
|
||||
|
||||
logging.info(f"数据文件列名: {list(data_df.columns)}")
|
||||
|
||||
# --- 关键修改:创建副本以避免 SettingWithCopyWarning ---
|
||||
data_df = data_df.copy()
|
||||
|
||||
# 确保PAD ID列是字符串类型
|
||||
if 'PAD ID' in data_df.columns:
|
||||
data_df['PAD ID'] = data_df['PAD ID'].astype(str).str.strip()
|
||||
logging.info(f"数据文件PAD ID数据类型: {data_df['PAD ID'].dtype}")
|
||||
|
||||
# 检查必要的列是否存在
|
||||
required_columns = ["PAD ID", "Component ID", "Height(mil)", "Volume(%)", "Area(%)"]
|
||||
|
||||
# 处理可能的列名变体
|
||||
column_mapping = {}
|
||||
for required_col in required_columns:
|
||||
if required_col not in data_df.columns:
|
||||
# 查找相似的列名
|
||||
# 更宽松的匹配方式:忽略空格和大小写
|
||||
similar_cols = [col for col in data_df.columns if
|
||||
required_col.lower().replace(" ", "") in col.lower().replace(" ", "")]
|
||||
if similar_cols:
|
||||
column_mapping[required_col] = similar_cols[0]
|
||||
logging.info(f"映射列: {required_col} -> {similar_cols[0]}")
|
||||
|
||||
# 重命名列
|
||||
if column_mapping:
|
||||
data_df = data_df.rename(columns=column_mapping)
|
||||
|
||||
missing_columns = [col for col in required_columns if col not in data_df.columns]
|
||||
if missing_columns:
|
||||
logging.error(f"数据文件中缺少以下列: {missing_columns}")
|
||||
logging.info(f"数据文件所有列: {list(data_df.columns)}")
|
||||
return None
|
||||
|
||||
return data_df # 返回处理好的副本
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"加载数据文件失败: {e}")
|
||||
return None
|
||||
|
||||
def process_data(self):
|
||||
"""处理数据并合并"""
|
||||
all_data = []
|
||||
total_files = len(self.data_files)
|
||||
|
||||
if total_files == 0:
|
||||
logging.error("未找到任何数据文件")
|
||||
return False
|
||||
|
||||
for idx, data_file in enumerate(self.data_files, 1):
|
||||
logging.info(f"处理数据文件 {idx}/{total_files}: {os.path.basename(data_file)}")
|
||||
try:
|
||||
# 加载并清理数据文件
|
||||
data_df = self.load_and_clean_data_file(data_file)
|
||||
if data_df is None:
|
||||
logging.error(f"无法加载文件: {os.path.basename(data_file)}")
|
||||
continue
|
||||
|
||||
# 选择需要的字段
|
||||
required_columns = ["PAD ID", "Component ID", "Height(mil)", "Volume(%)", "Area(%)"]
|
||||
|
||||
# 检查数据文件中是否存在所有必需的列
|
||||
available_columns = [col for col in required_columns if col in data_df.columns]
|
||||
if len(available_columns) != len(required_columns):
|
||||
missing = set(required_columns) - set(available_columns)
|
||||
logging.warning(f"文件 {os.path.basename(data_file)} 缺少列: {missing}")
|
||||
logging.info(f"可用的列: {available_columns}")
|
||||
# --- 关键修改:使用可用的列继续处理 (再次创建副本) ---
|
||||
data_df = data_df[available_columns].copy()
|
||||
else:
|
||||
# --- 关键修改:选择所需的列 (创建副本) ---
|
||||
data_df = data_df[required_columns].copy()
|
||||
|
||||
# 添加数据来源字段
|
||||
data_df["数据来源"] = os.path.basename(data_file)
|
||||
data_df["限制来源"] = os.path.basename(self.spec_file)
|
||||
|
||||
# 调试信息:显示合并前的数据类型
|
||||
logging.info(
|
||||
f"合并前 - 数据文件PAD ID示例: {data_df['PAD ID'].head(3).tolist() if 'PAD ID' in data_df.columns else 'N/A'}")
|
||||
logging.info(
|
||||
f"合并前 - 规格文件PAD ID示例: {self.spec_data['PAD ID'].head(3).tolist() if 'PAD ID' in self.spec_data.columns else 'N/A'}")
|
||||
|
||||
# 从规格文件中选择需要的字段
|
||||
spec_columns = ["PAD ID", "Component ID", "Vol_Min(%)", "Vol_Max(%)",
|
||||
"Height_Low(mil)", "Height_High(mil)", "Area_Min(%)", "Area_Max(%)"]
|
||||
|
||||
# 只选择存在的列
|
||||
available_spec_columns = [col for col in spec_columns if col in self.spec_data.columns]
|
||||
# --- 关键修改:使用 .copy() 创建一个独立的副本,避免 SettingWithCopyWarning ---
|
||||
spec_df = self.spec_data[available_spec_columns].copy()
|
||||
|
||||
# 确保规格文件的PAD ID也是字符串类型
|
||||
if 'PAD ID' in spec_df.columns:
|
||||
spec_df['PAD ID'] = spec_df['PAD ID'].astype(str).str.strip()
|
||||
|
||||
# 合并规格数据
|
||||
merged_df = pd.merge(data_df, spec_df, on="PAD ID", how="inner", suffixes=('_data', '_spec'))
|
||||
|
||||
if merged_df.empty:
|
||||
logging.warning(f"文件 {os.path.basename(data_file)} 与规格数据无匹配项")
|
||||
# 显示一些调试信息
|
||||
data_pad_ids = set(data_df['PAD ID'].unique()) if 'PAD ID' in data_df.columns else set()
|
||||
spec_pad_ids = set(spec_df['PAD ID'].unique()) if 'PAD ID' in spec_df.columns else set()
|
||||
common_ids = data_pad_ids.intersection(spec_pad_ids)
|
||||
logging.info(
|
||||
f"数据文件PAD ID数量: {len(data_pad_ids)}, 规格文件PAD ID数量: {len(spec_pad_ids)}, 共同ID数量: {len(common_ids)}")
|
||||
logging.info(f"数据文件前5个PAD ID: {list(data_pad_ids)[:5] if data_pad_ids else 'N/A'}")
|
||||
logging.info(f"规格文件前5个PAD ID: {list(spec_pad_ids)[:5] if spec_pad_ids else 'N/A'}")
|
||||
continue
|
||||
|
||||
# --- 优化开始:确保 Component ID 来自数据文件 ---
|
||||
# 即使合并产生了两个 Component ID (_data 和 _spec),我们也明确使用来自 data_df 的那个
|
||||
if 'Component ID_data' in merged_df.columns:
|
||||
merged_df['Component ID'] = merged_df['Component ID_data']
|
||||
# 可选:删除来自规格文件的 Component ID 列
|
||||
# merged_df.drop(columns=['Component ID_spec'], inplace=True, errors='ignore')
|
||||
# 或者保留它以便对比,这里我们先注释掉删除操作
|
||||
|
||||
# 如果因为某种原因没有 _data 后缀(例如只有一个 Component ID),则默认就是 data_df 的
|
||||
# (这种情况在 merge 时不会发生,因为我们用了 suffixes)
|
||||
# --- 优化结束 ---
|
||||
|
||||
# --- 新增:对规格高度字段执行单位转换(除以 25.4) ---
|
||||
# 为避免意外字符导致转换失败,先清洗再转换为数值
|
||||
convert_cols = ["Height_Low(mil)", "Height_High(mil)"]
|
||||
for col in convert_cols:
|
||||
if col in merged_df.columns:
|
||||
before_non_null = merged_df[col].notna().sum()
|
||||
# 清洗非数字字符(保留数字、小数点和负号)
|
||||
cleaned = merged_df[col].astype(str).str.replace(r'[^\d\.\-]+', '', regex=True)
|
||||
merged_df[col] = pd.to_numeric(cleaned, errors='coerce') / 25.4
|
||||
after_non_null = merged_df[col].notna().sum()
|
||||
logging.info(
|
||||
f"字段 {col} 已除以 25.4 完成单位转换,非空值数: 转换前 {before_non_null} -> 转换后 {after_non_null}"
|
||||
)
|
||||
else:
|
||||
logging.warning(f"规格高度字段缺失,无法进行单位转换: {col}")
|
||||
|
||||
# 选择最终输出的字段(按照要求的顺序)
|
||||
output_columns = [
|
||||
"PAD ID", "Component ID", "Vol_Min(%)", "Vol_Max(%)", "Height_Low(mil)",
|
||||
"Height_High(mil)", "Area_Min(%)", "Area_Max(%)", "Height(mil)", "Volume(%)", "Area(%)",
|
||||
"数据来源", "限制来源"
|
||||
]
|
||||
|
||||
# --- 优化开始 ---
|
||||
# 只选择存在的列
|
||||
available_output_columns = [col for col in output_columns if col in merged_df.columns]
|
||||
|
||||
# 检查是否有列缺失并打印警告
|
||||
missing_output_columns = [col for col in output_columns if col not in merged_df.columns]
|
||||
if missing_output_columns:
|
||||
logging.warning(
|
||||
f"文件 {os.path.basename(data_file)} 的最终输出中缺少以下预期列: {missing_output_columns}")
|
||||
|
||||
# 如果没有任何可用列,则跳过此文件
|
||||
if not available_output_columns:
|
||||
logging.error(f"文件 {os.path.basename(data_file)} 没有任何预期的输出列,将跳过此文件。")
|
||||
continue
|
||||
|
||||
merged_df = merged_df[available_output_columns].copy() # 再次使用.copy()确保安全
|
||||
# --- 优化结束 ---
|
||||
|
||||
all_data.append(merged_df)
|
||||
logging.info(f"文件 {os.path.basename(data_file)} 处理成功,匹配 {len(merged_df)} 行")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"处理文件 {os.path.basename(data_file)} 时出错: {e}")
|
||||
# 显示更多调试信息
|
||||
if 'data_df' in locals() and 'PAD ID' in data_df.columns:
|
||||
logging.info(f"数据文件PAD ID数据类型: {data_df['PAD ID'].dtype}")
|
||||
logging.info(f"数据文件PAD ID示例: {data_df['PAD ID'].head(3).tolist()}")
|
||||
if hasattr(self, 'spec_data') and 'PAD ID' in self.spec_data.columns:
|
||||
logging.info(f"规格文件PAD ID数据类型: {self.spec_data['PAD ID'].dtype}")
|
||||
logging.info(f"规格文件PAD ID示例: {self.spec_data['PAD ID'].head(3).tolist()}")
|
||||
continue
|
||||
|
||||
if all_data:
|
||||
self.merged_data = pd.concat(all_data, ignore_index=True)
|
||||
logging.info(f"数据处理完成,共合并 {len(self.merged_data)} 行数据")
|
||||
logging.info(f"最终数据列名: {list(self.merged_data.columns)}")
|
||||
else:
|
||||
logging.error("未成功处理任何数据文件")
|
||||
return False
|
||||
return True
|
||||
|
||||
def save_to_excel(self):
|
||||
"""保存合并后的数据到Excel文件"""
|
||||
try:
|
||||
# 生成时间戳
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
output_filename = f"dataProcess_out_{timestamp}.xlsx"
|
||||
output_file = os.path.join(self.data_folder, output_filename)
|
||||
|
||||
self.merged_data.to_excel(output_file, index=False)
|
||||
logging.info(f"数据已保存到: {output_file}")
|
||||
|
||||
# 显示统计信息
|
||||
stats = f"处理统计:\n"
|
||||
stats += f"- 规格文件: {os.path.basename(self.spec_file)}\n"
|
||||
stats += f"- 处理的数据文件数: {len(self.data_files)}\n"
|
||||
stats += f"- 合并的总行数: {len(self.merged_data)}\n"
|
||||
stats += f"- 输出文件: {output_file}\n"
|
||||
stats += f"- 包含的列: {list(self.merged_data.columns)}"
|
||||
|
||||
logging.info(stats)
|
||||
# 原来的 message box 提示已移除,改为日志输出
|
||||
logging.info("处理完成。\n" + stats)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"保存数据失败: {e}")
|
||||
# 原来的 error message box 已移除,改为日志输出
|
||||
logging.error(f"保存数据失败: {e}")
|
||||
|
||||
def run(self):
|
||||
"""运行整个数据处理流程"""
|
||||
logging.info("开始数据处理流程")
|
||||
|
||||
try:
|
||||
if not self.select_spec_file():
|
||||
return
|
||||
if not self.select_data_folder():
|
||||
return
|
||||
if not self.load_spec_data():
|
||||
return
|
||||
if not self.scan_data_files():
|
||||
return
|
||||
if not self.process_data():
|
||||
# 原来的 error message box 已移除,改为日志输出
|
||||
logging.error("数据处理失败,请检查日志信息")
|
||||
return
|
||||
self.save_to_excel()
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"处理流程出错: {e}")
|
||||
# 原来的 error message box 已移除,改为日志输出
|
||||
logging.error(f"处理过程中出现错误:\n{e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
processor = DataProcessor()
|
||||
processor.run()
|
||||
1060
dataProcess/dataProcess_html_V1.py
Normal file
1060
dataProcess/dataProcess_html_V1.py
Normal file
File diff suppressed because it is too large
Load Diff
810
dataProcess/dataProcess_sightml_V1.py
Normal file
810
dataProcess/dataProcess_sightml_V1.py
Normal file
@@ -0,0 +1,810 @@
|
||||
import pandas as pd
|
||||
import tkinter as tk
|
||||
from tkinter import filedialog
|
||||
import os
|
||||
from datetime import datetime
|
||||
import numpy as np
|
||||
|
||||
|
||||
class DataProcessor:
|
||||
def __init__(self):
|
||||
self.data = None
|
||||
self.filename = None
|
||||
self.file_path = None
|
||||
self.file_dir = None
|
||||
self.processing_start_time = None
|
||||
|
||||
def select_file(self):
|
||||
"""手动选择数据文件"""
|
||||
print("🔍 打开文件选择对话框...")
|
||||
root = tk.Tk()
|
||||
root.withdraw()
|
||||
|
||||
self.file_path = filedialog.askopenfilename(
|
||||
title="选择数据文件",
|
||||
filetypes=[("Excel files", "*.xlsx"), ("CSV files", "*.csv"), ("All files", "*.*")]
|
||||
)
|
||||
|
||||
if self.file_path:
|
||||
self.filename = os.path.basename(self.file_path)
|
||||
self.file_dir = os.path.dirname(self.file_path)
|
||||
print(f"✅ 已选择文件: {self.filename}")
|
||||
print(f"📁 文件所在目录: {self.file_dir}")
|
||||
return True
|
||||
else:
|
||||
print("❌ 未选择文件")
|
||||
return False
|
||||
|
||||
def _load_data(self):
|
||||
"""加载数据文件"""
|
||||
print("📥 开始加载数据文件...")
|
||||
try:
|
||||
if self.file_path.endswith('.csv'):
|
||||
self.data = pd.read_csv(self.file_path)
|
||||
print("✅ 成功加载CSV文件")
|
||||
elif self.file_path.endswith('.xlsx'):
|
||||
self.data = pd.read_excel(self.file_path)
|
||||
print("✅ 成功加载Excel文件")
|
||||
else:
|
||||
raise ValueError("不支持的文件格式")
|
||||
|
||||
print(f"📊 数据文件形状: {self.data.shape}")
|
||||
print(f"📋 数据列名: {list(self.data.columns)[:10]}...")
|
||||
|
||||
# 显示数据预览
|
||||
print("\n📋 数据预览(前3行):")
|
||||
print(self.data.head(3))
|
||||
|
||||
# 显示列数据类型
|
||||
print("\n📊 列数据类型:")
|
||||
for col in self.data.columns[:10]:
|
||||
print(f" {col}: {self.data[col].dtype}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 加载数据文件时出错: {e}")
|
||||
raise
|
||||
|
||||
def _validate_data(self):
|
||||
"""验证数据完整性"""
|
||||
print("🔍 验证数据完整性...")
|
||||
|
||||
# 检查必要的测量列
|
||||
required_measure_columns = ['PAD ID', 'Component ID', 'Height(mil)', 'Volume(%)', 'Area(%)']
|
||||
missing_measure_columns = [col for col in required_measure_columns if col not in self.data.columns]
|
||||
|
||||
if missing_measure_columns:
|
||||
error_msg = f"❌ 数据文件中缺少必要的测量列: {missing_measure_columns}"
|
||||
print(error_msg)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
# 检查上下限列
|
||||
required_limit_columns = ['Height_Low(mil)', 'Height_High(mil)', 'Vol_Min(%)', 'Vol_Max(%)', 'Area_Min(%)',
|
||||
'Area_Max(%)']
|
||||
missing_limit_columns = [col for col in required_limit_columns if col not in self.data.columns]
|
||||
|
||||
if missing_limit_columns:
|
||||
error_msg = f"❌ 数据文件中缺少必要的上下限列: {missing_limit_columns}"
|
||||
print(error_msg)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
print("✅ 数据验证通过")
|
||||
|
||||
# 检查数据是否存在空值
|
||||
all_columns = required_measure_columns + required_limit_columns
|
||||
null_counts = self.data[all_columns].isnull().sum()
|
||||
if null_counts.any():
|
||||
print(f"⚠️ 数据中存在空值:")
|
||||
for col, count in null_counts[null_counts > 0].items():
|
||||
print(f" {col}: {count} 个空值")
|
||||
else:
|
||||
print("✅ 所有必需列都没有空值")
|
||||
|
||||
# 显示数据统计信息
|
||||
print("\n📊 数据统计信息:")
|
||||
|
||||
for col in required_measure_columns:
|
||||
if col in self.data.columns:
|
||||
# 检查列的数据类型,针对不同类型使用不同的格式化方式
|
||||
if pd.api.types.is_numeric_dtype(self.data[col]):
|
||||
valid_count = self.data[col].count()
|
||||
if valid_count > 0:
|
||||
min_val = self.data[col].min()
|
||||
max_val = self.data[col].max()
|
||||
print(f" {col}: {valid_count} 个有效值, 范围 {min_val:.4f} - {max_val:.4f}")
|
||||
else:
|
||||
print(f" {col}: 0 个有效值")
|
||||
else:
|
||||
# 非数值型列:显示唯一值和示例
|
||||
unique_count = self.data[col].nunique()
|
||||
sample_values = self.data[col].dropna().head(3).tolist()
|
||||
print(
|
||||
f" {col}: {self.data[col].count()} 个有效值, {unique_count} 个唯一值, 示例: {sample_values}")
|
||||
|
||||
# 检查并转换数据类型
|
||||
print("\n🔄 数据类型检查与转换:")
|
||||
numeric_columns = ['Height(mil)', 'Volume(%)', 'Area(%)',
|
||||
'Height_Low(mil)', 'Height_High(mil)',
|
||||
'Vol_Min(%)', 'Vol_Max(%)', 'Area_Min(%)', 'Area_Max(%)']
|
||||
|
||||
for col in numeric_columns:
|
||||
if col in self.data.columns:
|
||||
if not pd.api.types.is_numeric_dtype(self.data[col]):
|
||||
try:
|
||||
# 尝试转换为数值类型
|
||||
original_count = self.data[col].count()
|
||||
self.data[col] = pd.to_numeric(self.data[col], errors='coerce')
|
||||
converted_count = self.data[col].count()
|
||||
lost_data = original_count - converted_count
|
||||
if lost_data > 0:
|
||||
print(f" ⚠️ {col}: 转换后丢失 {lost_data} 个非数值数据")
|
||||
else:
|
||||
print(f" ✅ {col}: 成功转换为数值类型")
|
||||
except Exception as e:
|
||||
print(f" ❌ {col}: 类型转换失败 - {e}")
|
||||
else:
|
||||
valid_count = self.data[col].count()
|
||||
print(f" ✅ {col}: 已经是数值类型, {valid_count} 个有效值")
|
||||
|
||||
def _print_progress(self, message, level=1):
|
||||
"""打印进度信息,支持分级显示"""
|
||||
indent = " " * level
|
||||
timestamp = datetime.now().strftime("%H:%M:%S")
|
||||
print(f"{timestamp} {indent}{message}")
|
||||
|
||||
def generate_report(self):
|
||||
"""生成统计报告"""
|
||||
if self.data is None:
|
||||
raise ValueError("请先选择数据文件")
|
||||
|
||||
try:
|
||||
self.processing_start_time = datetime.now()
|
||||
print(f"\n🚀 开始生成报告 - {self.processing_start_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
|
||||
# 验证数据
|
||||
self._validate_data()
|
||||
|
||||
self._print_progress("开始数据处理...", 1)
|
||||
|
||||
# 创建分组键
|
||||
self._print_progress("创建分组键...", 2)
|
||||
|
||||
# 确保PAD ID和Component ID都是字符串类型
|
||||
self.data['PAD ID'] = self.data['PAD ID'].astype(str)
|
||||
self.data['Component ID'] = self.data['Component ID'].astype(str)
|
||||
|
||||
self.data['Group_Key'] = self.data['PAD ID'] + '_' + self.data['Component ID']
|
||||
group_count = self.data['Group_Key'].nunique()
|
||||
self._print_progress(f"共发现 {group_count} 个分组", 2)
|
||||
|
||||
# 显示分组信息
|
||||
group_info = self.data['Group_Key'].value_counts()
|
||||
self._print_progress(f"分组数据量统计:", 2)
|
||||
for i, (group, count) in enumerate(group_info.head(5).items()):
|
||||
self._print_progress(f" {group}: {count} 个数据点", 3)
|
||||
if len(group_info) > 5:
|
||||
self._print_progress(f" ... 还有 {len(group_info) - 5} 个分组", 3)
|
||||
|
||||
# 检查数值列是否存在NaN值
|
||||
numeric_columns = ['Height(mil)', 'Volume(%)', 'Area(%)']
|
||||
for col in numeric_columns:
|
||||
if col in self.data.columns:
|
||||
nan_count = self.data[col].isna().sum()
|
||||
if nan_count > 0:
|
||||
self._print_progress(f"⚠️ {col} 有 {nan_count} 个空值,将在统计计算中排除", 3)
|
||||
|
||||
# 计算统计信息
|
||||
self._print_progress("计算基本统计信息...", 2)
|
||||
|
||||
# 确保数值列没有无穷大值
|
||||
for col in numeric_columns:
|
||||
if col in self.data.columns:
|
||||
inf_count = np.isinf(self.data[col]).sum()
|
||||
if inf_count > 0:
|
||||
self._print_progress(f"⚠️ {col} 有 {inf_count} 个无穷大值,将替换为NaN", 3)
|
||||
self.data[col] = self.data[col].replace([np.inf, -np.inf], np.nan)
|
||||
|
||||
stats = self.data.groupby('Group_Key').agg({
|
||||
'Height(mil)': ['min', 'max', 'mean', 'std'],
|
||||
'Volume(%)': ['min', 'max', 'mean', 'std'],
|
||||
'Area(%)': ['min', 'max', 'mean', 'std']
|
||||
}).round(4)
|
||||
|
||||
# 重命名列
|
||||
stats.columns = [
|
||||
'Height_Measured_Min(mil)', 'Height_Measured_Max(mil)', 'Height_Mean(mil)', 'Height_Std(mil)',
|
||||
'Volume_Measured_Min(%)', 'Volume_Measured_Max(%)', 'Volume_Mean(%)', 'Volume_Std(%)',
|
||||
'Area_Measured_Min(%)', 'Area_Measured_Max(%)', 'Area_Mean(%)', 'Area_Std(%)'
|
||||
]
|
||||
|
||||
self._print_progress("基本统计信息计算完成", 2)
|
||||
|
||||
# 获取上下限信息
|
||||
self._print_progress("获取预设上下限信息...", 2)
|
||||
limits = self.data.groupby('Group_Key').agg({
|
||||
'Height_Low(mil)': 'first',
|
||||
'Height_High(mil)': 'first',
|
||||
'Vol_Min(%)': 'first',
|
||||
'Vol_Max(%)': 'first',
|
||||
'Area_Min(%)': 'first',
|
||||
'Area_Max(%)': 'first'
|
||||
}).round(4)
|
||||
|
||||
# 合并统计信息和上下限信息
|
||||
stats = pd.concat([stats, limits], axis=1)
|
||||
self._print_progress("上下限信息获取完成", 2)
|
||||
|
||||
# 计算CPK
|
||||
self._print_progress("开始计算CPK值...", 2)
|
||||
stats = self._calculate_cpk(stats)
|
||||
|
||||
# 分析CPK结果
|
||||
cpk_analysis = self._analyze_cpk_results(stats)
|
||||
self._print_progress("CPK分析完成", 2)
|
||||
self._print_cpk_summary(cpk_analysis)
|
||||
|
||||
# 生成HTML报告
|
||||
self._print_progress("生成HTML报告...", 2)
|
||||
report_path = self._create_html_report(stats, cpk_analysis)
|
||||
self._print_progress("HTML报告生成完成", 2)
|
||||
|
||||
# 计算处理时间
|
||||
processing_time = datetime.now() - self.processing_start_time
|
||||
self._print_progress(f"总处理时间: {processing_time.total_seconds():.2f} 秒", 1)
|
||||
|
||||
return report_path
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 生成报告过程中出错: {e}")
|
||||
import traceback
|
||||
print(f"详细错误信息:")
|
||||
traceback.print_exc()
|
||||
raise
|
||||
|
||||
def _analyze_cpk_results(self, stats):
|
||||
"""分析CPK结果"""
|
||||
cpk_analysis = {
|
||||
'total_groups': len(stats),
|
||||
'cpk_status': {'Height': {}, 'Volume': {}, 'Area': {}},
|
||||
'problematic_groups': []
|
||||
}
|
||||
|
||||
for feature in ['Height', 'Volume', 'Area']:
|
||||
cpk_col = f'{feature}_Cpk'
|
||||
if cpk_col not in stats.columns:
|
||||
continue
|
||||
|
||||
valid_cpk = stats[cpk_col].dropna()
|
||||
total_valid = len(valid_cpk)
|
||||
|
||||
cpk_analysis['cpk_status'][feature] = {
|
||||
'total': total_valid,
|
||||
'excellent': len(valid_cpk[valid_cpk >= 1.33]) if total_valid > 0 else 0,
|
||||
'acceptable': len(valid_cpk[(valid_cpk >= 1.0) & (valid_cpk < 1.33)]) if total_valid > 0 else 0,
|
||||
'poor': len(valid_cpk[valid_cpk < 1.0]) if total_valid > 0 else 0,
|
||||
'invalid': len(stats) - total_valid
|
||||
}
|
||||
|
||||
# 识别有问题的分组(任意特征的CPK < 1.0)
|
||||
for group_key, row in stats.iterrows():
|
||||
problems = []
|
||||
for feature in ['Height', 'Volume', 'Area']:
|
||||
cpk_col = f'{feature}_Cpk'
|
||||
if cpk_col in stats.columns and not pd.isna(row[cpk_col]):
|
||||
if row[cpk_col] < 1.0:
|
||||
problems.append(f"{feature}: {row[cpk_col]:.4f}")
|
||||
|
||||
if problems:
|
||||
cpk_analysis['problematic_groups'].append({
|
||||
'group_key': group_key,
|
||||
'problems': problems
|
||||
})
|
||||
|
||||
return cpk_analysis
|
||||
|
||||
def _print_cpk_summary(self, cpk_analysis):
|
||||
"""打印CPK结果摘要"""
|
||||
print("\n📈 CPK分析结果摘要:")
|
||||
print("=" * 60)
|
||||
|
||||
for feature, status in cpk_analysis['cpk_status'].items():
|
||||
total = status['total']
|
||||
if total == 0:
|
||||
print(f"\n{feature}: 无有效CPK数据")
|
||||
continue
|
||||
|
||||
print(f"\n{feature}:")
|
||||
excellent_pct = (status['excellent'] / total * 100) if total > 0 else 0
|
||||
acceptable_pct = (status['acceptable'] / total * 100) if total > 0 else 0
|
||||
poor_pct = (status['poor'] / total * 100) if total > 0 else 0
|
||||
|
||||
print(f" ✅ 优秀 (CPK ≥ 1.33): {status['excellent']}/{total} ({excellent_pct:.1f}%)")
|
||||
print(f" ⚠️ 合格 (1.0 ≤ CPK < 1.33): {status['acceptable']}/{total} ({acceptable_pct:.1f}%)")
|
||||
print(f" ❌ 不合格 (CPK < 1.0): {status['poor']}/{total} ({poor_pct:.1f}%)")
|
||||
print(f" ❓ 无法计算: {status['invalid']}")
|
||||
|
||||
if cpk_analysis['problematic_groups']:
|
||||
print(f"\n⚠️ 发现 {len(cpk_analysis['problematic_groups'])} 个有问题分组:")
|
||||
for i, group in enumerate(cpk_analysis['problematic_groups'][:10]):
|
||||
print(f" {i + 1}. {group['group_key']}: {', '.join(group['problems'])}")
|
||||
if len(cpk_analysis['problematic_groups']) > 10:
|
||||
print(f" ... 还有 {len(cpk_analysis['problematic_groups']) - 10} 个问题分组")
|
||||
else:
|
||||
print("\n✅ 所有分组的CPK都在合格范围内")
|
||||
|
||||
print("=" * 60)
|
||||
|
||||
def _calculate_cpk(self, stats):
|
||||
"""计算CPK值"""
|
||||
self._print_progress("详细计算CPK值...", 3)
|
||||
|
||||
def calculate_single_cpk(mean, std, usl, lsl):
|
||||
"""计算单个特征的CPK"""
|
||||
if pd.isna(mean) or pd.isna(std) or std == 0:
|
||||
return np.nan
|
||||
|
||||
if pd.isna(usl) or pd.isna(lsl):
|
||||
return np.nan
|
||||
|
||||
try:
|
||||
cpu = (usl - mean) / (3 * std) if usl != float('inf') else float('inf')
|
||||
cpl = (mean - lsl) / (3 * std) if lsl != float('-inf') else float('inf')
|
||||
|
||||
if cpu == float('inf') and cpl == float('inf'):
|
||||
return np.nan
|
||||
elif cpu == float('inf'):
|
||||
return cpl
|
||||
elif cpl == float('inf'):
|
||||
return cpu
|
||||
else:
|
||||
return min(cpu, cpl)
|
||||
except (ZeroDivisionError, TypeError):
|
||||
return np.nan
|
||||
|
||||
# 计算每个特征的CPK
|
||||
cpk_results = []
|
||||
total_groups = len(stats)
|
||||
|
||||
for idx, row in stats.iterrows():
|
||||
if len(cpk_results) % 100 == 0 and total_groups > 100:
|
||||
self._print_progress(f"计算第 {len(cpk_results) + 1} 个分组的CPK...", 4)
|
||||
|
||||
# Height CPK
|
||||
height_cpk = calculate_single_cpk(
|
||||
row.get('Height_Mean(mil)', np.nan),
|
||||
row.get('Height_Std(mil)', np.nan),
|
||||
row.get('Height_High(mil)', np.nan),
|
||||
row.get('Height_Low(mil)', np.nan)
|
||||
)
|
||||
|
||||
# Volume CPK
|
||||
volume_cpk = calculate_single_cpk(
|
||||
row.get('Volume_Mean(%)', np.nan),
|
||||
row.get('Volume_Std(%)', np.nan),
|
||||
row.get('Vol_Max(%)', np.nan),
|
||||
row.get('Vol_Min(%)', np.nan)
|
||||
)
|
||||
|
||||
# Area CPK
|
||||
area_cpk = calculate_single_cpk(
|
||||
row.get('Area_Mean(%)', np.nan),
|
||||
row.get('Area_Std(%)', np.nan),
|
||||
row.get('Area_Max(%)', np.nan),
|
||||
row.get('Area_Min(%)', np.nan)
|
||||
)
|
||||
|
||||
cpk_results.append({
|
||||
'Height_Cpk': round(height_cpk, 4) if not pd.isna(height_cpk) else np.nan,
|
||||
'Volume_Cpk': round(volume_cpk, 4) if not pd.isna(volume_cpk) else np.nan,
|
||||
'Area_Cpk': round(area_cpk, 4) if not pd.isna(area_cpk) else np.nan
|
||||
})
|
||||
|
||||
# 将CPK结果添加到统计数据中
|
||||
cpk_df = pd.DataFrame(cpk_results, index=stats.index)
|
||||
stats = pd.concat([stats, cpk_df], axis=1)
|
||||
|
||||
self._print_progress(f"所有 {len(stats)} 个分组CPK计算完成", 3)
|
||||
return stats
|
||||
|
||||
def _get_cpk_status_class(self, cpk_value):
|
||||
"""根据CPK值返回状态类别"""
|
||||
if pd.isna(cpk_value):
|
||||
return 'cpk-invalid'
|
||||
elif cpk_value >= 1.33:
|
||||
return 'cpk-excellent'
|
||||
elif cpk_value >= 1.0:
|
||||
return 'cpk-acceptable'
|
||||
else:
|
||||
return 'cpk-poor'
|
||||
|
||||
def _create_html_report(self, stats, cpk_analysis):
|
||||
"""创建完整的HTML报告"""
|
||||
self._print_progress("构建HTML报告内容...", 3)
|
||||
|
||||
total_groups = len(stats)
|
||||
|
||||
# 完整的HTML模板
|
||||
html_content = f"""<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>数据统计报告 - {self.filename}</title>
|
||||
<style>
|
||||
:root {{
|
||||
--color-excellent: #4CAF50;
|
||||
--color-acceptable: #FFC107;
|
||||
--color-poor: #F44336;
|
||||
--color-invalid: #9E9E9E;
|
||||
}}
|
||||
|
||||
body {{
|
||||
font-family: 'Segoe UI', Arial, sans-serif;
|
||||
margin: 20px;
|
||||
line-height: 1.6;
|
||||
background-color: #f8f9fa;
|
||||
}}
|
||||
|
||||
.container {{
|
||||
max-width: 95%;
|
||||
margin: 0 auto;
|
||||
background: white;
|
||||
padding: 20px;
|
||||
border-radius: 10px;
|
||||
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
|
||||
}}
|
||||
|
||||
h1 {{
|
||||
color: #2c3e50;
|
||||
border-bottom: 3px solid #3498db;
|
||||
padding-bottom: 10px;
|
||||
text-align: center;
|
||||
}}
|
||||
|
||||
h2 {{
|
||||
color: #34495e;
|
||||
margin-top: 30px;
|
||||
padding: 15px;
|
||||
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
||||
color: white;
|
||||
border-radius: 5px;
|
||||
}}
|
||||
|
||||
.summary {{
|
||||
background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
|
||||
color: white;
|
||||
padding: 20px;
|
||||
border-radius: 10px;
|
||||
margin-bottom: 30px;
|
||||
}}
|
||||
|
||||
.cpk-dashboard {{
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
|
||||
gap: 20px;
|
||||
margin: 20px 0;
|
||||
}}
|
||||
|
||||
.cpk-card {{
|
||||
background: white;
|
||||
padding: 20px;
|
||||
border-radius: 10px;
|
||||
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
|
||||
text-align: center;
|
||||
}}
|
||||
|
||||
.cpk-excellent {{ background-color: var(--color-excellent); color: white; }}
|
||||
.cpk-acceptable {{ background-color: var(--color-acceptable); color: black; }}
|
||||
.cpk-poor {{ background-color: var(--color-poor); color: white; }}
|
||||
.cpk-invalid {{ background-color: var(--color-invalid); color: white; }}
|
||||
|
||||
table {{
|
||||
border-collapse: collapse;
|
||||
width: 100%;
|
||||
margin-top: 20px;
|
||||
font-size: 12px;
|
||||
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
|
||||
background: white;
|
||||
}}
|
||||
|
||||
th, td {{
|
||||
border: 1px solid #ddd;
|
||||
padding: 12px;
|
||||
text-align: center;
|
||||
}}
|
||||
|
||||
th {{
|
||||
background: linear-gradient(135deg, #74b9ff 0%, #0984e3 100%);
|
||||
color: white;
|
||||
font-weight: bold;
|
||||
position: sticky;
|
||||
top: 0;
|
||||
}}
|
||||
|
||||
tr:nth-child(even) {{ background-color: #f8f9fa; }}
|
||||
tr:hover {{ background-color: #e3f2fd; }}
|
||||
|
||||
.limits {{
|
||||
background-color: #e8f5e8;
|
||||
font-weight: bold;
|
||||
color: #2e7d32;
|
||||
}}
|
||||
|
||||
.measured {{
|
||||
background-color: #fff3cd;
|
||||
color: #856404;
|
||||
}}
|
||||
|
||||
.problematic-row {{
|
||||
background-color: #ffebee !important;
|
||||
border-left: 4px solid var(--color-poor);
|
||||
}}
|
||||
|
||||
.warning-box {{
|
||||
background: #fff3cd;
|
||||
border-left: 4px solid #ffc107;
|
||||
padding: 15px;
|
||||
margin: 20px 0;
|
||||
border-radius: 5px;
|
||||
}}
|
||||
|
||||
.chart-container {{
|
||||
margin: 20px 0;
|
||||
padding: 20px;
|
||||
background: white;
|
||||
border-radius: 10px;
|
||||
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
|
||||
}}
|
||||
|
||||
.legend {{
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
gap: 20px;
|
||||
margin: 20px 0;
|
||||
flex-wrap: wrap;
|
||||
}}
|
||||
|
||||
.legend-item {{
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 5px;
|
||||
padding: 5px 10px;
|
||||
border-radius: 3px;
|
||||
}}
|
||||
|
||||
.na {{ color: #999; font-style: italic; }}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h1>📊 数据统计报告 - {self.filename}</h1>
|
||||
<p><strong>生成时间:</strong> {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
|
||||
<p><strong>输入文件:</strong> {self.filename}</p>
|
||||
|
||||
<div class="summary">
|
||||
<h2>📈 报告摘要</h2>
|
||||
<p><strong>总分组数量:</strong> {total_groups}</p>
|
||||
<p><strong>处理时间:</strong> {(datetime.now() - self.processing_start_time).total_seconds():.2f} 秒</p>
|
||||
</div>
|
||||
|
||||
<!-- CPK状态仪表板 -->
|
||||
<div class="cpk-dashboard">
|
||||
"""
|
||||
|
||||
# 添加CPK状态卡片
|
||||
for feature, status in cpk_analysis['cpk_status'].items():
|
||||
total = status['total'] + status['invalid']
|
||||
if total == 0:
|
||||
continue
|
||||
|
||||
html_content += f"""
|
||||
<div class="cpk-card">
|
||||
<h3>{feature} CPK状态</h3>
|
||||
<div style="font-size: 2em; font-weight: bold; margin: 10px 0;">
|
||||
{status['excellent'] + status['acceptable']}/{total}
|
||||
</div>
|
||||
<p>合格率: {(status['excellent'] + status['acceptable']) / total * 100:.1f}%</p>
|
||||
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px; margin-top: 10px;">
|
||||
<span class="legend-item cpk-excellent">优秀: {status['excellent']}</span>
|
||||
<span class="legend-item cpk-acceptable">合格: {status['acceptable']}</span>
|
||||
<span class="legend-item cpk-poor">不合格: {status['poor']}</span>
|
||||
<span class="legend-item cpk-invalid">无效: {status['invalid']}</span>
|
||||
</div>
|
||||
</div>
|
||||
"""
|
||||
|
||||
html_content += f"""
|
||||
</div>
|
||||
|
||||
<!-- 问题分组警告 -->
|
||||
{f'<div class="warning-box"><h3>⚠️ 发现 {len(cpk_analysis["problematic_groups"])} 个问题分组</h3><p>以下分组的CPK值低于1.0,需要重点关注</p></div>' if cpk_analysis['problematic_groups'] else ''}
|
||||
|
||||
<h2>📋 详细统计数据</h2>
|
||||
|
||||
<div class="legend">
|
||||
<span class="legend-item" style="background-color: #e8f5e8;">预设上下限</span>
|
||||
<span class="legend-item" style="background-color: #fff3cd;">实测值</span>
|
||||
<span class="legend-item cpk-excellent">CPK ≥ 1.33</span>
|
||||
<span class="legend-item cpk-acceptable">1.0 ≤ CPK < 1.33</span>
|
||||
<span class="legend-item cpk-poor">CPK < 1.0</span>
|
||||
</div>
|
||||
|
||||
<div style="overflow-x: auto;">
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th rowspan="2">分组标识</th>
|
||||
<th colspan="7">Height(mil)</th>
|
||||
<th colspan="7">Volume(%)</th>
|
||||
<th colspan="7">Area(%)</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<!-- Height列标题 -->
|
||||
<th class="limits">预设下限</th>
|
||||
<th class="limits">预设上限</th>
|
||||
<th class="measured">实测最小值</th>
|
||||
<th class="measured">实测最大值</th>
|
||||
<th>平均值</th>
|
||||
<th>标准差</th>
|
||||
<th>CPK</th>
|
||||
<!-- Volume列标题 -->
|
||||
<th class="limits">预设下限</th>
|
||||
<th class="limits">预设上限</th>
|
||||
<th class="measured">实测最小值</th>
|
||||
<th class="measured">实测最大值</th>
|
||||
<th>平均值</th>
|
||||
<th>标准差</th>
|
||||
<th>CPK</th>
|
||||
<!-- Area列标题 -->
|
||||
<th class="limits">预设下限</th>
|
||||
<th class="limits">预设上限</th>
|
||||
<th class="measured">实测最小值</th>
|
||||
<th class="measured">实测最大值</th>
|
||||
<th>平均值</th>
|
||||
<th>标准差</th>
|
||||
<th>CPK</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
"""
|
||||
|
||||
# 生成表格行数据的辅助函数
|
||||
def format_value(value):
|
||||
if pd.isna(value):
|
||||
return '<span class="na">N/A</span>'
|
||||
elif isinstance(value, (int, float)):
|
||||
return f"{value:.4f}"
|
||||
else:
|
||||
return str(value)
|
||||
|
||||
# 用于检查列是否存在的辅助函数
|
||||
def safe_get_value(row, column_name):
|
||||
"""安全获取列值,如果列不存在返回N/A"""
|
||||
if column_name in row.index:
|
||||
return row[column_name]
|
||||
else:
|
||||
return np.nan
|
||||
|
||||
for group_key, row in stats.iterrows():
|
||||
# 检查是否为问题分组
|
||||
is_problematic = any(problem['group_key'] == group_key for problem in cpk_analysis['problematic_groups'])
|
||||
row_class = 'class="problematic-row"' if is_problematic else ''
|
||||
|
||||
html_content += f"""
|
||||
<tr {row_class}>
|
||||
<td><strong>{group_key}</strong>{' ⚠️' if is_problematic else ''}</td>
|
||||
"""
|
||||
|
||||
# 为每个特征生成列
|
||||
for feature in ['Height', 'Volume', 'Area']:
|
||||
cpk_value = safe_get_value(row, f'{feature}_Cpk')
|
||||
cpk_class = self._get_cpk_status_class(cpk_value)
|
||||
|
||||
# 为不同特征设置正确的列名
|
||||
if feature == 'Height':
|
||||
lower_limit_col = 'Height_Low(mil)'
|
||||
upper_limit_col = 'Height_High(mil)'
|
||||
measured_min_col = 'Height_Measured_Min(mil)'
|
||||
measured_max_col = 'Height_Measured_Max(mil)'
|
||||
mean_col = 'Height_Mean(mil)'
|
||||
std_col = 'Height_Std(mil)'
|
||||
else:
|
||||
lower_limit_col = f"{'Vol' if feature == 'Volume' else 'Area'}_Min(%)" # 修正:Volume使用Vol_Min(%),Area使用Area_Min(%)
|
||||
upper_limit_col = f"{'Vol' if feature == 'Volume' else 'Area'}_Max(%)" # 修正:Volume使用Vol_Max(%),Area使用Area_Max(%)
|
||||
measured_min_col = f'{feature}_Measured_Min(%)'
|
||||
measured_max_col = f'{feature}_Measured_Max(%)'
|
||||
mean_col = f'{feature}_Mean(%)'
|
||||
std_col = f'{feature}_Std(%)'
|
||||
|
||||
html_content += f"""
|
||||
<!-- {feature}数据 -->
|
||||
<td class="limits">{format_value(safe_get_value(row, lower_limit_col))}</td>
|
||||
<td class="limits">{format_value(safe_get_value(row, upper_limit_col))}</td>
|
||||
<td class="measured">{format_value(safe_get_value(row, measured_min_col))}</td>
|
||||
<td class="measured">{format_value(safe_get_value(row, measured_max_col))}</td>
|
||||
<td>{format_value(safe_get_value(row, mean_col))}</td>
|
||||
<td>{format_value(safe_get_value(row, std_col))}</td>
|
||||
<td class="{cpk_class}">{format_value(cpk_value)}</td>
|
||||
"""
|
||||
|
||||
html_content += """
|
||||
</tr>"""
|
||||
|
||||
html_content += """
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div class="chart-container">
|
||||
<h2>📊 CPK状态分布</h2>
|
||||
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 20px;">
|
||||
"""
|
||||
|
||||
# 添加简单的CPK分布图表
|
||||
for feature, status in cpk_analysis['cpk_status'].items():
|
||||
total = status['total'] + status['invalid']
|
||||
if total == 0:
|
||||
continue
|
||||
|
||||
html_content += f"""
|
||||
<div>
|
||||
<h3>{feature} CPK分布</h3>
|
||||
<div style="background: #f8f9fa; padding: 20px; border-radius: 5px;">
|
||||
<div style="display: flex; height: 30px; margin: 10px 0; border-radius: 5px; overflow: hidden;">
|
||||
<div style="background: var(--color-excellent); width: {status['excellent'] / total * 100}%;"></div>
|
||||
<div style="background: var(--color-acceptable); width: {status['acceptable'] / total * 100}%;"></div>
|
||||
<div style="background: var(--color-poor); width: {status['poor'] / total * 100}%;"></div>
|
||||
<div style="background: var(--color-invalid); width: {status['invalid'] / total * 100}%;"></div>
|
||||
</div>
|
||||
<div style="text-align: center;">
|
||||
<small>优秀 {status['excellent']} | 合格 {status['acceptable']} | 不合格 {status['poor']} | 无效 {status['invalid']}</small>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
"""
|
||||
|
||||
html_content += """
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>"""
|
||||
|
||||
# 保存报告
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
report_filename = f"{os.path.splitext(self.filename)[0]}_report_{timestamp}.html"
|
||||
report_path = os.path.join(self.file_dir, report_filename)
|
||||
|
||||
self._print_progress(f"保存报告到: {report_path}", 3)
|
||||
with open(report_path, 'w', encoding='utf-8') as f:
|
||||
f.write(html_content)
|
||||
|
||||
return report_path
|
||||
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
print("=" * 60)
|
||||
print("🚀 数据统计报告生成程序 - Volume上下限修复版")
|
||||
print("=" * 60)
|
||||
|
||||
processor = DataProcessor()
|
||||
|
||||
try:
|
||||
if processor.select_file():
|
||||
processor._load_data()
|
||||
report_path = processor.generate_report()
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("✅ 程序执行完成")
|
||||
print(f"📄 统计报告生成成功: {report_path}")
|
||||
print("=" * 60)
|
||||
else:
|
||||
print("❌ 未选择文件,程序退出")
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n❌ 程序执行失败: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
17
htmlProcess/.gitignore
vendored
Normal file
17
htmlProcess/.gitignore
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
/build/*
|
||||
/build
|
||||
/dist/*
|
||||
/dist
|
||||
/source/*
|
||||
/source
|
||||
|
||||
|
||||
|
||||
|
||||
htmlReportProcess_Merge_picHtml_V3.py
|
||||
|
||||
htmlReportProcess_Merge_picHtml_V2.py
|
||||
|
||||
htmlReportProcess_Merge_pic_V2.py
|
||||
|
||||
/htmlReportProcess*/
|
||||
11
htmlProcess/README.md
Normal file
11
htmlProcess/README.md
Normal file
@@ -0,0 +1,11 @@
|
||||
# Sample GitLab Project
|
||||
|
||||
This sample project shows how a project in GitLab looks for demonstration purposes. It contains issues, merge requests and Markdown files in many branches,
|
||||
named and filled with lorem ipsum.
|
||||
|
||||
You can look around to get an idea how to structure your project and, when done, you can safely delete this project.
|
||||
|
||||
[Learn more about creating GitLab projects.](https://docs.gitlab.com/ee/gitlab-basics/create-project.html)
|
||||
|
||||
html文件的报告自动分析和处理数据的工具脚本
|
||||
|
||||
926
htmlProcess/htmlReportProcess_Merge_picHtml_V1.py
Normal file
926
htmlProcess/htmlReportProcess_Merge_picHtml_V1.py
Normal file
@@ -0,0 +1,926 @@
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
from datetime import datetime
|
||||
from matplotlib.lines import Line2D
|
||||
from typing import Optional, Tuple, List, Dict, Any, Union
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
import base64
|
||||
from io import BytesIO
|
||||
from jinja2 import Template
|
||||
from colorama import Fore, Style, init
|
||||
|
||||
# 避免 SettingWithCopy 警告影响输出可读性
|
||||
pd.options.mode.chained_assignment = None
|
||||
|
||||
# 设置中文字体支持
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans', 'Arial Unicode MS', 'Microsoft YaHei']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
|
||||
# HTML模板 - 添加了SN独立图的显示
|
||||
HTML_TEMPLATE = """
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>测试报告分析 - {{ keyword }}</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
margin: 0;
|
||||
padding: 20px;
|
||||
background-color: #f5f5f5;
|
||||
}
|
||||
.header {
|
||||
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
||||
color: white;
|
||||
padding: 20px;
|
||||
border-radius: 10px;
|
||||
margin-bottom: 20px;
|
||||
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
.test-card {
|
||||
background: white;
|
||||
border-radius: 10px;
|
||||
padding: 20px;
|
||||
margin-bottom: 20px;
|
||||
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
||||
transition: transform 0.2s ease;
|
||||
}
|
||||
.test-card:hover {
|
||||
transform: translateY(-2px);
|
||||
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15);
|
||||
}
|
||||
.test-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
margin-bottom: 15px;
|
||||
padding-bottom: 10px;
|
||||
border-bottom: 2px solid #eaeaea;
|
||||
}
|
||||
.test-title {
|
||||
font-size: 18px;
|
||||
font-weight: bold;
|
||||
color: #333;
|
||||
}
|
||||
.test-stats {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
||||
gap: 15px;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
.stat-item {
|
||||
background: #f8f9fa;
|
||||
padding: 12px;
|
||||
border-radius: 8px;
|
||||
text-align: center;
|
||||
}
|
||||
.stat-label {
|
||||
font-size: 12px;
|
||||
color: #666;
|
||||
margin-bottom: 5px;
|
||||
}
|
||||
.stat-value {
|
||||
font-size: 16px;
|
||||
font-weight: bold;
|
||||
color: #333;
|
||||
}
|
||||
.plot-container {
|
||||
text-align: center;
|
||||
margin: 20px 0;
|
||||
}
|
||||
.plot-image {
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
.sn-plots-container {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(400px, 1fr));
|
||||
gap: 20px;
|
||||
margin: 20px 0;
|
||||
}
|
||||
.sn-plot-item {
|
||||
background: #f8f9fa;
|
||||
padding: 15px;
|
||||
border-radius: 8px;
|
||||
text-align: center;
|
||||
}
|
||||
.sn-plot-title {
|
||||
font-size: 14px;
|
||||
font-weight: bold;
|
||||
margin-bottom: 10px;
|
||||
color: #555;
|
||||
}
|
||||
.summary {
|
||||
background: white;
|
||||
border-radius: 10px;
|
||||
padding: 20px;
|
||||
margin-top: 20px;
|
||||
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
.summary-item {
|
||||
margin: 10px 0;
|
||||
padding: 10px;
|
||||
background: #f8f9fa;
|
||||
border-radius: 6px;
|
||||
}
|
||||
.timestamp {
|
||||
text-align: center;
|
||||
color: #666;
|
||||
margin-top: 30px;
|
||||
font-size: 12px;
|
||||
}
|
||||
.badge {
|
||||
padding: 4px 8px;
|
||||
border-radius: 12px;
|
||||
font-size: 12px;
|
||||
font-weight: bold;
|
||||
}
|
||||
.badge-success {
|
||||
background: #d4edda;
|
||||
color: #155724;
|
||||
}
|
||||
.badge-warning {
|
||||
background: #fff3cd;
|
||||
color: #856404;
|
||||
}
|
||||
.badge-danger {
|
||||
background: #f8d7da;
|
||||
color: #721c24;
|
||||
}
|
||||
.section-title {
|
||||
font-size: 16px;
|
||||
font-weight: bold;
|
||||
margin: 20px 0 10px 0;
|
||||
color: #333;
|
||||
border-left: 4px solid #667eea;
|
||||
padding-left: 10px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="header">
|
||||
<h1>📊 测试报告分析</h1>
|
||||
<p>关键词: <strong>{{ keyword }}</strong> | 生成时间: {{ timestamp }}</p>
|
||||
<p>共分析 {{ test_count }} 个测试项,{{ total_points }} 个数据点</p>
|
||||
</div>
|
||||
|
||||
{% for test in tests %}
|
||||
<div class="test-card">
|
||||
<div class="test-header">
|
||||
<div class="test-title">📋 {{ test.name }}</div>
|
||||
<div class="badge badge-{{ test.status }}">
|
||||
{{ test.status_display }}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="test-stats">
|
||||
<div class="stat-item">
|
||||
<div class="stat-label">数据点数</div>
|
||||
<div class="stat-value">{{ test.stats.count }}</div>
|
||||
</div>
|
||||
<div class="stat-item">
|
||||
<div class="stat-label">平均值</div>
|
||||
<div class="stat-value">{{ "%.4f"|format(test.stats.mean) }}</div>
|
||||
</div>
|
||||
<div class="stat-item">
|
||||
<div class="stat-label">中位数</div>
|
||||
<div class="stat-value">{{ "%.4f"|format(test.stats.median) }}</div>
|
||||
</div>
|
||||
<div class="stat-item">
|
||||
<div class="stat-label">标准差</div>
|
||||
<div class="stat-value">{{ "%.4f"|format(test.stats.std) }}</div>
|
||||
</div>
|
||||
<div class="stat-item">
|
||||
<div class="stat-label">最小值</div>
|
||||
<div class="stat-value">{{ "%.4f"|format(test.stats.min) }}</div>
|
||||
</div>
|
||||
<div class="stat-item">
|
||||
<div class="stat-label">最大值</div>
|
||||
<div class="stat-value">{{ "%.4f"|format(test.stats.max) }}</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% if test.limits.lower is not none or test.limits.upper is not none %}
|
||||
<div class="test-stats">
|
||||
{% if test.limits.lower is not none %}
|
||||
<div class="stat-item">
|
||||
<div class="stat-label">下限值</div>
|
||||
<div class="stat-value">{{ "%.4f"|format(test.limits.lower) }}</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if test.limits.upper is not none %}
|
||||
<div class="stat-item">
|
||||
<div class="stat-label">上限值</div>
|
||||
<div class="stat-value">{{ "%.4f"|format(test.limits.upper) }}</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<!-- 汇总图 -->
|
||||
<div class="section-title">📈 汇总视图 (所有SN)</div>
|
||||
<div class="plot-container">
|
||||
<img src="data:image/png;base64,{{ test.summary_plot_image }}" alt="{{ test.name }} 汇总散点图" class="plot-image">
|
||||
</div>
|
||||
|
||||
<!-- SN独立图 -->
|
||||
{% if test.sn_plot_images %}
|
||||
<div class="section-title">🔍 SN独立视图 ({{ test.sn_plot_images|length }}个SN)</div>
|
||||
<div class="sn-plots-container">
|
||||
{% for sn_plot in test.sn_plot_images %}
|
||||
<div class="sn-plot-item">
|
||||
<div class="sn-plot-title">SN: {{ sn_plot.sn }}</div>
|
||||
<img src="data:image/png;base64,{{ sn_plot.image }}" alt="{{ test.name }} - SN {{ sn_plot.sn }} 散点图" class="plot-image">
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endfor %}
|
||||
|
||||
<div class="summary">
|
||||
<h3>📈 分析摘要</h3>
|
||||
<div class="summary-item">
|
||||
<strong>文件路径:</strong> {{ file_path }}
|
||||
</div>
|
||||
<div class="summary-item">
|
||||
<strong>分析时间:</strong> {{ analysis_time }}秒
|
||||
</div>
|
||||
<div class="summary-item">
|
||||
<strong>测试项分布:</strong>
|
||||
<ul>
|
||||
<li>正常: {{ status_counts.normal }} 个</li>
|
||||
<li>警告: {{ status_counts.warning }} 个</li>
|
||||
<li>异常: {{ status_counts.abnormal }} 个</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="timestamp">
|
||||
报告生成于 {{ timestamp }} | 测试报告分析系统
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
class TestReportScatterPlotter:
|
||||
def __init__(self):
|
||||
self.file_path: Optional[str] = None
|
||||
self.df: Optional[pd.DataFrame] = None
|
||||
self.output_dir: Optional[str] = None
|
||||
self.required_columns = ["Test Name New", "SN", "Measurement", "Test Time", "Lower Limit", "Upper Limit", ]
|
||||
self.col_lower: Optional[str] = None
|
||||
self.col_upper: Optional[str] = None
|
||||
self.html_report_path: Optional[str] = None
|
||||
|
||||
# 缓存处理过的数据
|
||||
self._processed_data_cache: Dict[str, Any] = {}
|
||||
|
||||
def _print_stage(self, msg: str) -> None:
|
||||
"""统一的阶段信息输出"""
|
||||
print(f"\n{'=' * 30}\n{msg}\n{'=' * 30}")
|
||||
|
||||
def _print_progress(self, current: int, total: int, prefix: str = "进度") -> None:
|
||||
"""改进的进度条显示"""
|
||||
if total <= 0:
|
||||
return
|
||||
|
||||
percent = (current / total) * 100
|
||||
bar_len = 30
|
||||
filled = int(bar_len * current / total)
|
||||
bar = "█" * filled + "-" * (bar_len - filled)
|
||||
sys.stdout.write(f"\r{prefix}: [{bar}] {current}/{total} ({percent:.1f}%)")
|
||||
sys.stdout.flush()
|
||||
if current == total:
|
||||
print() # 换行
|
||||
|
||||
def get_file_path(self) -> None:
|
||||
"""改进的文件路径获取,支持路径补全"""
|
||||
self._print_stage("输入文件路径")
|
||||
|
||||
while True:
|
||||
print(f"{Fore.WHITE}请输入测试报告文件路径(.xlsx): ")
|
||||
file_path = input("> ").strip()
|
||||
|
||||
# 尝试路径补全和验证
|
||||
if not file_path:
|
||||
continue
|
||||
|
||||
path_obj = Path(file_path)
|
||||
if path_obj.exists():
|
||||
self.file_path = str(path_obj.resolve())
|
||||
print(f"已选择文件: {self.file_path}")
|
||||
break
|
||||
else:
|
||||
print(f"文件不存在: {file_path},请重新输入")
|
||||
|
||||
def _find_column_case_insensitive(self, candidates: List[str]) -> Optional[str]:
|
||||
"""优化的大小写不敏感列查找"""
|
||||
if self.df is None:
|
||||
return None
|
||||
|
||||
columns_lower = {col.lower().strip(): col for col in self.df.columns}
|
||||
for candidate in candidates:
|
||||
key = candidate.lower().strip()
|
||||
if key in columns_lower:
|
||||
return columns_lower[key]
|
||||
return None
|
||||
|
||||
def load_data(self) -> None:
|
||||
"""优化的数据加载方法"""
|
||||
self._print_stage("加载数据")
|
||||
start_time = time.time()
|
||||
|
||||
# 检查文件是否存在
|
||||
if not os.path.exists(self.file_path):
|
||||
raise FileNotFoundError(f"文件不存在: {self.file_path}")
|
||||
|
||||
# 根据文件扩展名选择最优引擎
|
||||
file_ext = self.file_path.lower()
|
||||
if file_ext.endswith('.xlsx'):
|
||||
# .xlsx 文件引擎选择优先级
|
||||
engine_options = ['openpyxl', 'calamine'] # calamine需要安装并可能更快
|
||||
engine = 'openpyxl' # 默认
|
||||
elif file_ext.endswith('.xls'):
|
||||
# .xls 文件引擎选择
|
||||
engine_options = ['xlrd', 'calamine']
|
||||
engine = 'xlrd' # 默认
|
||||
else:
|
||||
raise ValueError("输入文件不是有效的 Excel 文件(应为 .xls 或 .xlsx 格式)")
|
||||
|
||||
# 快速获取工作表名称(轻量级方式)
|
||||
try:
|
||||
if engine == 'openpyxl':
|
||||
import openpyxl
|
||||
workbook = openpyxl.load_workbook(self.file_path, read_only=True)
|
||||
sheet_names = workbook.sheetnames
|
||||
workbook.close()
|
||||
elif engine == 'xlrd':
|
||||
import xlrd
|
||||
workbook = xlrd.open_workbook(self.file_path, on_demand=True)
|
||||
sheet_names = workbook.sheet_names()
|
||||
workbook.release_resources()
|
||||
else:
|
||||
# 使用pandas的轻量级方式
|
||||
excel_file = pd.ExcelFile(self.file_path, engine=engine)
|
||||
sheet_names = excel_file.sheet_names
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"无法打开 Excel 文件,请确认该文件未被损坏或占用。错误: {type(e).__name__}: {e}")
|
||||
|
||||
# 定义优先查找的工作表名
|
||||
target_sheets = ["Merged All Tests", "All Tests"]
|
||||
selected_sheet = None
|
||||
|
||||
for sheet in target_sheets:
|
||||
if sheet in sheet_names:
|
||||
selected_sheet = sheet
|
||||
break
|
||||
|
||||
if selected_sheet is None:
|
||||
raise ValueError(
|
||||
f"未找到指定的工作表: {' 或 '.join(target_sheets)}。"
|
||||
f"当前文件包含的工作表有: {sheet_names}"
|
||||
)
|
||||
|
||||
try:
|
||||
# 性能优化:使用更高效的参数设置
|
||||
read_excel_kwargs = {
|
||||
# 'filepath_or_buffer': self.file_path,
|
||||
'io': self.file_path, # 修正:使用'io'而不是'filepath_or_buffer'
|
||||
'sheet_name': selected_sheet,
|
||||
'engine': engine,
|
||||
'dtype': 'object', # 先统一读取为对象类型,减少类型推断时间
|
||||
'na_filter': False, # 禁用自动NA过滤,提高读取速度
|
||||
}
|
||||
|
||||
# 如果知道必需列,且不为空,则只读取需要的列
|
||||
if hasattr(self, 'required_columns') and self.required_columns:
|
||||
# 先检查哪些列实际存在
|
||||
try:
|
||||
# 轻量级检查列名是否存在
|
||||
sample_df = pd.read_excel(
|
||||
self.file_path,
|
||||
sheet_name=selected_sheet,
|
||||
engine=engine,
|
||||
nrows=1 # 只读取第一行来获取列名
|
||||
)
|
||||
existing_columns = [col for col in self.required_columns if col in sample_df.columns]
|
||||
|
||||
if len(existing_columns) < len(self.required_columns):
|
||||
missing = set(self.required_columns) - set(existing_columns)
|
||||
raise KeyError(f"缺少必要列: {list(missing)}")
|
||||
|
||||
read_excel_kwargs['usecols'] = existing_columns
|
||||
|
||||
# print(f"使用 read_excel_kwargs 读取excel:\n {read_excel_kwargs}")
|
||||
# 打印完整的参数信息(调试用)
|
||||
print("使用 read_excel_kwargs 读取excel:")
|
||||
for key, value in read_excel_kwargs.items():
|
||||
print(f" {key}: {repr(value)}") # 使用repr确保特殊字符正确显示
|
||||
|
||||
except Exception as e:
|
||||
print(f"列检查失败,将读取所有列: {e}")
|
||||
# 如果列检查失败,回退到读取所有列
|
||||
|
||||
|
||||
# 执行数据读取
|
||||
self._print_stage("执行数据读取")
|
||||
self.df = pd.read_excel(**read_excel_kwargs)
|
||||
|
||||
except Exception as e:
|
||||
# 如果默认引擎失败,尝试备选引擎
|
||||
print(f"引擎 {engine} 读取失败,尝试备选引擎...\n{e}")
|
||||
try:
|
||||
# 回退到基本的读取方式
|
||||
self.df = pd.read_excel(
|
||||
self.file_path,
|
||||
sheet_name=selected_sheet,
|
||||
engine=None # 让pandas自动选择
|
||||
)
|
||||
except Exception as fallback_e:
|
||||
raise RuntimeError(
|
||||
f"读取 Excel 失败,工作表: '{selected_sheet}'。"
|
||||
f"主引擎错误: {type(e).__name__}: {e}\n"
|
||||
f"备选引擎错误: {type(fallback_e).__name__}: {fallback_e}"
|
||||
)
|
||||
|
||||
if self.df.empty:
|
||||
raise ValueError("工作表为空,无法处理")
|
||||
|
||||
# 校验必要列(如果前面没有使用usecols过滤,这里需要再次检查)
|
||||
if hasattr(self, 'required_columns') and self.required_columns:
|
||||
missing_columns = [col for col in self.required_columns if col not in self.df.columns]
|
||||
if missing_columns:
|
||||
raise KeyError(f"缺少必要列: {missing_columns}")
|
||||
|
||||
# 记录上下限列名
|
||||
self.col_lower = self._find_column_case_insensitive([
|
||||
"Lower Limit", "lower limit", "lower_limit", "ll", "lower"
|
||||
])
|
||||
self.col_upper = self._find_column_case_insensitive([
|
||||
"Upper Limit", "upper limit", "upper_limit", "ul", "upper"
|
||||
])
|
||||
|
||||
loading_time = time.time() - start_time
|
||||
print(f"数据加载完成: {len(self.df)} 行 × {self.df.shape[1]} 列")
|
||||
print(f"使用引擎: {engine}")
|
||||
print(f"耗时: {loading_time:.2f}s")
|
||||
|
||||
# 显示列信息摘要
|
||||
print(f"检测到下限列: {self.col_lower or '无'}")
|
||||
print(f"检测到上限列: {self.col_upper or '无'}")
|
||||
|
||||
# 可选:类型转换(如果知道具体的数据类型)
|
||||
# self._convert_data_types()
|
||||
|
||||
# 可以添加这个方法进行类型转换优化
|
||||
def _convert_data_types(self):
|
||||
"""优化数据类型转换"""
|
||||
if self.df is None or self.df.empty:
|
||||
return
|
||||
|
||||
# 根据列名模式推断数据类型
|
||||
numeric_patterns = ['limit', 'value', 'measure', 'result', 'score']
|
||||
date_patterns = ['date', 'time', 'period']
|
||||
|
||||
for col in self.df.columns:
|
||||
col_lower = str(col).lower()
|
||||
|
||||
# 数值类型转换
|
||||
if any(pattern in col_lower for pattern in numeric_patterns):
|
||||
self.df[col] = pd.to_numeric(self.df[col], errors='coerce')
|
||||
# 日期类型转换
|
||||
elif any(pattern in col_lower for pattern in date_patterns):
|
||||
self.df[col] = pd.to_datetime(self.df[col], errors='coerce')
|
||||
|
||||
|
||||
def get_keyword(self) -> Tuple[pd.DataFrame, str, List[str]]:
|
||||
"""获取用户输入的关键词并筛选数据"""
|
||||
self._print_stage("筛选关键词")
|
||||
|
||||
while True:
|
||||
keyword = input("请输入筛选关键词(匹配 'Test Name New'): ").strip()
|
||||
|
||||
if not keyword:
|
||||
print("❌ 关键词不能为空,请重新输入")
|
||||
continue
|
||||
|
||||
# 检查数据框是否为空
|
||||
if self.df.empty:
|
||||
print("⚠️ 数据框为空,无法进行筛选")
|
||||
return pd.DataFrame(), keyword, []
|
||||
|
||||
# 检查列是否存在
|
||||
if "Test Name New" not in self.df.columns:
|
||||
print("❌ 列 'Test Name New' 不存在于数据框中")
|
||||
print(f"可用列: {list(self.df.columns)}")
|
||||
return pd.DataFrame(), keyword, []
|
||||
|
||||
try:
|
||||
mask = self.df["Test Name New"].astype(str).str.contains(keyword, case=False, na=False)
|
||||
filtered_df = self.df.loc[mask].copy()
|
||||
|
||||
if filtered_df.empty:
|
||||
# 提供友好的提示和建议,而不是直接抛出异常
|
||||
print(f"⚠️ 没有找到包含关键词 '{keyword}' 的测试项")
|
||||
|
||||
# 显示部分可用的测试项作为参考
|
||||
available_tests = self.df["Test Name New"].dropna().unique()
|
||||
if len(available_tests) > 0:
|
||||
print("📋 可用的测试项示例:")
|
||||
for test in available_tests[:5]: # 只显示前5个作为参考
|
||||
print(f" - {test}")
|
||||
if len(available_tests) > 5:
|
||||
print(f" ... 还有 {len(available_tests) - 5} 个测试项")
|
||||
|
||||
# 提供重新输入或退出的选项
|
||||
choice = input("请选择: 1-重新输入关键词 2-使用所有数据 3-退出当前操作: ")
|
||||
if choice == "1":
|
||||
continue
|
||||
elif choice == "2":
|
||||
filtered_df = self.df.copy()
|
||||
unique_tests = filtered_df["Test Name New"].unique().tolist()
|
||||
print(f"✅ 使用所有数据: {len(filtered_df)} 行,{len(unique_tests)} 个测试项")
|
||||
return filtered_df, "", unique_tests
|
||||
else:
|
||||
print("👋 退出筛选操作")
|
||||
return pd.DataFrame(), keyword, []
|
||||
else:
|
||||
unique_tests = filtered_df["Test Name New"].unique().tolist()
|
||||
print(f"✅ 匹配到 {len(filtered_df)} 行数据,涉及 {len(unique_tests)} 个不同测试项")
|
||||
return filtered_df, keyword, unique_tests
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 筛选过程中发生错误: {e}")
|
||||
print("请检查数据格式或重新输入关键词")
|
||||
continue
|
||||
|
||||
def create_output_dir(self, keyword) -> None:
|
||||
"""创建输出目录"""
|
||||
self._print_stage("创建输出目录")
|
||||
|
||||
if not self.file_path:
|
||||
raise ValueError("文件路径未设置")
|
||||
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
base_dir = os.path.dirname(self.file_path)
|
||||
# self.output_dir = os.path.join(base_dir, f"scatter_report_{timestamp}")
|
||||
self.output_dir = os.path.join(base_dir, f"scatter_report_out")
|
||||
self.html_report_path = os.path.join(self.output_dir, f"{keyword}_report_{timestamp}.html")
|
||||
|
||||
os.makedirs(self.output_dir, exist_ok=True)
|
||||
print(f"输出目录: {self.output_dir}")
|
||||
|
||||
@staticmethod
|
||||
def _safe_filename(name: str) -> str:
|
||||
"""生成安全的文件名"""
|
||||
safe = "".join(c for c in str(name) if c.isalnum() or c in (" ", "_", "-")).strip()
|
||||
return safe or "Unknown_Test"
|
||||
|
||||
def _extract_limits(self, df_one_test: pd.DataFrame) -> Tuple[
|
||||
Optional[float], Optional[float], List[float], List[float]]:
|
||||
"""提取某个测试项的上下限数值"""
|
||||
lower_plot = upper_plot = None
|
||||
lower_set = []
|
||||
upper_set = []
|
||||
|
||||
if self.col_lower and self.col_lower in df_one_test.columns:
|
||||
lower_vals = self._clean_and_convert_series(df_one_test[self.col_lower], 'numeric').dropna().unique()
|
||||
lower_set = sorted(lower_vals.tolist()) if len(lower_vals) > 0 else []
|
||||
if lower_set:
|
||||
lower_plot = min(lower_set)
|
||||
|
||||
if self.col_upper and self.col_upper in df_one_test.columns:
|
||||
upper_vals = self._clean_and_convert_series(df_one_test[self.col_upper], 'numeric').dropna().unique()
|
||||
upper_set = sorted(upper_vals.tolist()) if len(upper_vals) > 0 else []
|
||||
if upper_set:
|
||||
upper_plot = max(upper_set)
|
||||
|
||||
return lower_plot, upper_plot, lower_set, upper_set
|
||||
|
||||
@staticmethod
|
||||
def _clean_and_convert_series(series: pd.Series, target_type: str = 'numeric') -> pd.Series:
|
||||
"""统一的系列清洗和转换方法 - 修复了 ast 方法名错误"""
|
||||
if series.empty:
|
||||
return series
|
||||
|
||||
if target_type == 'numeric':
|
||||
# 数值转换优化
|
||||
if pd.api.types.is_numeric_dtype(series):
|
||||
return series.astype(float)
|
||||
|
||||
# 批量字符串处理 - 修复这里的问题
|
||||
cleaned = series.astype(str).str.replace(r'[, ]', '', regex=True).str.strip()
|
||||
return pd.to_numeric(cleaned, errors='coerce')
|
||||
|
||||
elif target_type == 'datetime':
|
||||
return TestReportScatterPlotter._convert_to_datetime(series)
|
||||
|
||||
return series
|
||||
|
||||
@staticmethod
|
||||
def _convert_to_datetime(series: pd.Series) -> pd.Series:
|
||||
"""优化的日期时间转换"""
|
||||
if pd.api.types.is_datetime64_any_dtype(series):
|
||||
return series
|
||||
|
||||
# 预处理:转换为数值和字符串两种形式
|
||||
numeric_series = pd.to_numeric(series, errors='coerce')
|
||||
string_series = series.astype(str).str.strip()
|
||||
|
||||
result = pd.Series(pd.NaT, index=series.index, dtype='datetime64[ns]')
|
||||
|
||||
# 数值时间戳处理
|
||||
masks = {
|
||||
'ms': numeric_series >= 1e11,
|
||||
's': (numeric_series >= 1e9) & (numeric_series < 1e11),
|
||||
'excel': (numeric_series > 20000) & (numeric_series < 60000)
|
||||
}
|
||||
|
||||
for mask_type, mask in masks.items():
|
||||
if mask.any():
|
||||
if mask_type == 'ms':
|
||||
result.loc[mask] = pd.to_datetime(numeric_series.loc[mask], unit='ms')
|
||||
elif mask_type == 's':
|
||||
result.loc[mask] = pd.to_datetime(numeric_series.loc[mask], unit='s')
|
||||
elif mask_type == 'excel':
|
||||
origin = pd.Timestamp('1899-12-30')
|
||||
result.loc[mask] = origin + pd.to_timedelta(numeric_series.loc[mask], unit='D')
|
||||
|
||||
# 字符串日期处理
|
||||
remaining_mask = result.isna()
|
||||
if remaining_mask.any():
|
||||
remaining_strings = string_series.loc[remaining_mask]
|
||||
|
||||
# 特定格式优先处理
|
||||
format_patterns = [
|
||||
(r'^\d{4}-\d{2}-\d{2} \d{2}-\d{2}-\d{2}$', '%Y-%m-%d %H-%M-%S'),
|
||||
]
|
||||
|
||||
for pattern, date_format in format_patterns:
|
||||
format_mask = remaining_strings.str.match(pattern)
|
||||
if format_mask.any():
|
||||
result.loc[remaining_mask[remaining_mask].index[format_mask]] = pd.to_datetime(
|
||||
remaining_strings.loc[format_mask], format=date_format, errors='coerce'
|
||||
)
|
||||
|
||||
# 通用解析
|
||||
still_na_mask = result.isna() & remaining_mask
|
||||
if still_na_mask.any():
|
||||
result.loc[still_na_mask] = pd.to_datetime(
|
||||
string_series.loc[still_na_mask], errors='coerce'
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def _preprocess_test_data(self, test_data: pd.DataFrame) -> pd.DataFrame:
|
||||
"""数据预处理"""
|
||||
# 数值转换
|
||||
test_data['Measurement_num'] = self._clean_and_convert_series(
|
||||
test_data['Measurement'], 'numeric'
|
||||
)
|
||||
test_data['TestTime_dt'] = self._clean_and_convert_series(
|
||||
test_data['Test Time'], 'datetime'
|
||||
)
|
||||
|
||||
# 去除无效数据
|
||||
valid_data = test_data.dropna(subset=['Measurement_num', 'TestTime_dt'])
|
||||
return valid_data.sort_values('TestTime_dt')
|
||||
|
||||
def _calculate_statistics(self, y_data: pd.Series) -> Dict[str, float]:
|
||||
"""计算统计信息"""
|
||||
stats = {
|
||||
'count': len(y_data),
|
||||
'mean': y_data.mean(),
|
||||
'median': y_data.median(),
|
||||
'min': y_data.min(),
|
||||
'max': y_data.max(),
|
||||
'std': y_data.std(),
|
||||
'q1': y_data.quantile(0.25),
|
||||
'q3': y_data.quantile(0.75)
|
||||
}
|
||||
return stats
|
||||
|
||||
def _plot_to_base64(self, fig) -> str:
|
||||
"""将图表转换为base64编码"""
|
||||
buf = BytesIO()
|
||||
fig.savefig(buf, format='png', dpi=150, bbox_inches='tight')
|
||||
buf.seek(0)
|
||||
img_str = base64.b64encode(buf.read()).decode('utf-8')
|
||||
plt.close(fig)
|
||||
return img_str
|
||||
|
||||
def _create_summary_plot(self, test_data: pd.DataFrame, test_name: str,
|
||||
lower_plot: Optional[float], upper_plot: Optional[float]) -> str:
|
||||
"""创建汇总图(所有SN在一个图中)"""
|
||||
fig, ax = plt.subplots(figsize=(12, 8))
|
||||
|
||||
# 分组绘制
|
||||
groups = list(test_data.groupby("SN")) if "SN" in test_data.columns else [("Unknown_SN", test_data)]
|
||||
for sn, group in groups:
|
||||
ax.scatter(group['TestTime_dt'], group['Measurement_num'],
|
||||
label=str(sn), alpha=0.7, s=25)
|
||||
|
||||
# 计算统计信息
|
||||
y_data = test_data['Measurement_num']
|
||||
stats = self._calculate_statistics(y_data)
|
||||
|
||||
# 绘制限值线和统计线
|
||||
x_min, x_max = test_data['TestTime_dt'].min(), test_data['TestTime_dt'].max()
|
||||
|
||||
if lower_plot is not None:
|
||||
ax.axhline(y=lower_plot, color='green', linestyle='--', linewidth=1.2, label="Lower Limit")
|
||||
if upper_plot is not None:
|
||||
ax.axhline(y=upper_plot, color='red', linestyle='--', linewidth=1.2, label="Upper Limit")
|
||||
|
||||
# 添加统计线
|
||||
ax.hlines(y=stats['mean'], xmin=x_min, xmax=x_max, colors='orange',
|
||||
linestyles='-', linewidth=1.5, alpha=0.7, label='Mean')
|
||||
ax.hlines(y=stats['median'], xmin=x_min, xmax=x_max, colors='purple',
|
||||
linestyles='-.', linewidth=1.5, alpha=0.7, label='Median')
|
||||
|
||||
# 设置图形属性
|
||||
ax.set_title(f"汇总图 - {test_name}")
|
||||
ax.set_xlabel("Test Time")
|
||||
ax.set_ylabel("Measurement Value")
|
||||
ax.grid(True, alpha=0.3)
|
||||
ax.tick_params(axis='x', rotation=45)
|
||||
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
|
||||
|
||||
return self._plot_to_base64(fig)
|
||||
|
||||
def _create_sn_plots(self, test_data: pd.DataFrame, test_name: str,
|
||||
lower_plot: Optional[float], upper_plot: Optional[float]) -> List[Dict[str, str]]:
|
||||
"""为每个SN创建独立图表"""
|
||||
sn_plots = []
|
||||
|
||||
if "SN" not in test_data.columns:
|
||||
return sn_plots
|
||||
|
||||
sn_groups = test_data.groupby("SN")
|
||||
|
||||
for sn, group in sn_groups:
|
||||
if group.empty:
|
||||
continue
|
||||
|
||||
fig, ax = plt.subplots(figsize=(10, 6))
|
||||
|
||||
# 绘制当前SN的数据点
|
||||
ax.scatter(group['TestTime_dt'], group['Measurement_num'],
|
||||
color='blue', alpha=0.7, s=30, label=f"SN: {sn}")
|
||||
|
||||
# 计算当前SN的统计信息
|
||||
y_data = group['Measurement_num']
|
||||
stats = self._calculate_statistics(y_data)
|
||||
|
||||
# 绘制限值线
|
||||
x_min, x_max = group['TestTime_dt'].min(), group['TestTime_dt'].max()
|
||||
|
||||
if lower_plot is not None:
|
||||
ax.axhline(y=lower_plot, color='green', linestyle='--', linewidth=1.2, label="Lower Limit")
|
||||
if upper_plot is not None:
|
||||
ax.axhline(y=upper_plot, color='red', linestyle='--', linewidth=1.2, label="Upper Limit")
|
||||
|
||||
# 添加统计线
|
||||
ax.hlines(y=stats['mean'], xmin=x_min, xmax=x_max, colors='orange',
|
||||
linestyles='-', linewidth=1.5, alpha=0.7, label='Mean')
|
||||
ax.hlines(y=stats['median'], xmin=x_min, xmax=x_max, colors='purple',
|
||||
linestyles='-.', linewidth=1.5, alpha=0.7, label='Median')
|
||||
|
||||
# 设置图形属性
|
||||
ax.set_title(f"SN独立图 - {test_name} (SN: {sn})")
|
||||
ax.set_xlabel("Test Time")
|
||||
ax.set_ylabel("Measurement Value")
|
||||
ax.grid(True, alpha=0.3)
|
||||
ax.tick_params(axis='x', rotation=45)
|
||||
ax.legend()
|
||||
|
||||
# 转换为base64
|
||||
plot_image = self._plot_to_base64(fig)
|
||||
sn_plots.append({"sn": str(sn), "image": plot_image})
|
||||
|
||||
return sn_plots
|
||||
|
||||
def _determine_test_status(self, stats: Dict[str, float],
|
||||
lower_limit: Optional[float],
|
||||
upper_limit: Optional[float]) -> Dict[str, Any]:
|
||||
"""确定测试状态"""
|
||||
status = "success"
|
||||
status_display = "正常"
|
||||
|
||||
if lower_limit is not None and upper_limit is not None:
|
||||
# 检查是否超出限值
|
||||
if stats['min'] < lower_limit or stats['max'] > upper_limit:
|
||||
status = "danger"
|
||||
status_display = "异常"
|
||||
elif (stats['mean'] < lower_limit * 1.1 or stats['mean'] > upper_limit * 0.9 or
|
||||
stats['std'] > (upper_limit - lower_limit) * 0.2):
|
||||
status = "warning"
|
||||
status_display = "警告"
|
||||
|
||||
return {"status": status, "status_display": status_display}
|
||||
|
||||
def generate_html_report(self, filtered_df: pd.DataFrame, keyword: str,
|
||||
unique_tests: List[str]) -> None:
|
||||
"""生成HTML报告"""
|
||||
self._print_stage("生成HTML报告")
|
||||
start_time = time.time()
|
||||
|
||||
test_results = []
|
||||
total_points = 0
|
||||
status_counts = {"success": 0, "warning": 0, "danger": 0}
|
||||
|
||||
for i, test_name in enumerate(unique_tests, 1):
|
||||
self._print_progress(i, len(unique_tests), "生成测试报告")
|
||||
|
||||
# 获取测试数据
|
||||
test_data = filtered_df[filtered_df["Test Name New"] == test_name].copy()
|
||||
test_data = self._preprocess_test_data(test_data)
|
||||
|
||||
if test_data.empty:
|
||||
continue
|
||||
|
||||
# 提取限值信息
|
||||
lower_plot, upper_plot, _, _ = self._extract_limits(test_data)
|
||||
|
||||
# 计算统计信息
|
||||
y_data = test_data['Measurement_num']
|
||||
stats = self._calculate_statistics(y_data)
|
||||
total_points += stats['count']
|
||||
|
||||
# 生成汇总图表
|
||||
summary_plot_image = self._create_summary_plot(test_data, test_name, lower_plot, upper_plot)
|
||||
|
||||
# 生成SN独立图表
|
||||
sn_plot_images = self._create_sn_plots(test_data, test_name, lower_plot, upper_plot)
|
||||
|
||||
# 确定测试状态
|
||||
status_info = self._determine_test_status(stats, lower_plot, upper_plot)
|
||||
status_counts[status_info["status"]] += 1
|
||||
|
||||
# 添加到结果列表
|
||||
test_results.append({
|
||||
"name": test_name,
|
||||
"stats": stats,
|
||||
"limits": {"lower": lower_plot, "upper": upper_plot},
|
||||
"summary_plot_image": summary_plot_image,
|
||||
"sn_plot_images": sn_plot_images,
|
||||
"status": status_info["status"],
|
||||
"status_display": status_info["status_display"]
|
||||
})
|
||||
|
||||
# 渲染HTML模板
|
||||
template = Template(HTML_TEMPLATE)
|
||||
html_content = template.render(
|
||||
keyword=keyword,
|
||||
timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
test_count=len(test_results),
|
||||
total_points=total_points,
|
||||
tests=test_results,
|
||||
file_path=self.file_path,
|
||||
analysis_time=round(time.time() - start_time, 2),
|
||||
status_counts={"normal": status_counts["success"], "warning": status_counts["warning"],
|
||||
"abnormal": status_counts["danger"]}
|
||||
)
|
||||
|
||||
# 保存HTML文件
|
||||
with open(self.html_report_path, 'w', encoding='utf-8') as f:
|
||||
f.write(html_content)
|
||||
|
||||
print(f"\nHTML报告已生成: {self.html_report_path}")
|
||||
print(f"共处理 {len(test_results)} 个测试项,{total_points} 个数据点")
|
||||
|
||||
def run(self) -> None:
|
||||
"""运行主程序"""
|
||||
try:
|
||||
self.get_file_path()
|
||||
self.load_data()
|
||||
while True:
|
||||
filtered_df, keyword, unique_tests = self.get_keyword()
|
||||
self.create_output_dir(keyword)
|
||||
self.generate_html_report(filtered_df, keyword, unique_tests)
|
||||
print(f"\n✅ 分析完成!")
|
||||
# print(f"📊 报告文件: {self.html_report_path}")
|
||||
# print(f"📁 输出目录: {self.output_dir}")
|
||||
except KeyboardInterrupt:
|
||||
print(f"\n{Fore.YELLOW}⚠ 用户中断程序")
|
||||
except Exception as e:
|
||||
print(f"\n❌ 发生错误: {type(e).__name__}: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
plotter = TestReportScatterPlotter()
|
||||
plotter.run()
|
||||
563
htmlProcess/htmlReportProcess_Merge_pic_V1.py
Normal file
563
htmlProcess/htmlReportProcess_Merge_pic_V1.py
Normal file
@@ -0,0 +1,563 @@
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
from datetime import datetime
|
||||
from matplotlib.lines import Line2D
|
||||
from typing import Optional, Tuple, List, Dict, Any, Union
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
|
||||
from colorama import Fore, Style, init
|
||||
|
||||
# 避免 SettingWithCopy 警告影响输出可读性
|
||||
pd.options.mode.chained_assignment = None
|
||||
|
||||
# 设置中文字体支持
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans', 'Arial Unicode MS', 'Microsoft YaHei']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
|
||||
|
||||
class TestReportScatterPlotter:
|
||||
def __init__(self):
|
||||
self.file_path: Optional[str] = None
|
||||
self.df: Optional[pd.DataFrame] = None
|
||||
self.output_dir: Optional[str] = None
|
||||
self.required_columns = ["Test Name New", "SN", "Measurement", "Test Time"]
|
||||
self.col_lower: Optional[str] = None
|
||||
self.col_upper: Optional[str] = None
|
||||
|
||||
# 缓存处理过的数据
|
||||
self._processed_data_cache: Dict[str, Any] = {}
|
||||
|
||||
def _print_stage(self, msg: str) -> None:
|
||||
"""统一的阶段信息输出"""
|
||||
print(f"\n{'=' * 30}\n{msg}\n{'=' * 30}")
|
||||
|
||||
def _print_progress(self, current: int, total: int, prefix: str = "进度") -> None:
|
||||
"""改进的进度条显示"""
|
||||
if total <= 0:
|
||||
return
|
||||
|
||||
percent = (current / total) * 100
|
||||
bar_len = 30
|
||||
filled = int(bar_len * current / total)
|
||||
bar = "█" * filled + "-" * (bar_len - filled)
|
||||
sys.stdout.write(f"\r{prefix}: [{bar}] {current}/{total} ({percent:.1f}%)")
|
||||
sys.stdout.flush()
|
||||
if current == total:
|
||||
print() # 换行
|
||||
|
||||
def get_file_path(self) -> None:
|
||||
"""改进的文件路径获取,支持路径补全"""
|
||||
self._print_stage("输入文件路径")
|
||||
|
||||
while True:
|
||||
print(f"{Fore.WHITE}请输入测试报告文件路径(.xlsx): ")
|
||||
file_path = input("> ").strip()
|
||||
|
||||
# 尝试路径补全和验证
|
||||
if not file_path:
|
||||
continue
|
||||
|
||||
path_obj = Path(file_path)
|
||||
if path_obj.exists():
|
||||
self.file_path = str(path_obj.resolve())
|
||||
print(f"已选择文件: {self.file_path}")
|
||||
break
|
||||
else:
|
||||
print(f"文件不存在: {file_path},请重新输入")
|
||||
|
||||
def _find_column_case_insensitive(self, candidates: List[str]) -> Optional[str]:
|
||||
"""优化的大小写不敏感列查找"""
|
||||
if self.df is None:
|
||||
return None
|
||||
|
||||
columns_lower = {col.lower().strip(): col for col in self.df.columns}
|
||||
for candidate in candidates:
|
||||
key = candidate.lower().strip()
|
||||
if key in columns_lower:
|
||||
return columns_lower[key]
|
||||
return None
|
||||
|
||||
def load_data(self) -> None:
|
||||
"""优化的数据加载方法"""
|
||||
self._print_stage("加载数据")
|
||||
start_time = time.time()
|
||||
|
||||
# try:
|
||||
# # 使用更高效的数据读取方式
|
||||
# self.df = pd.read_excel(
|
||||
# self.file_path,
|
||||
# sheet_name="Merged All Tests",
|
||||
# engine='openpyxl' # 指定引擎提高性能
|
||||
# )
|
||||
# except Exception as e:
|
||||
# raise RuntimeError(
|
||||
# f"读取 Excel 失败,请确认工作表名为 'Merged All Tests'。错误: {type(e).__name__}: {e}"
|
||||
# )
|
||||
|
||||
# 检查文件是否存在
|
||||
if not os.path.exists(self.file_path):
|
||||
raise FileNotFoundError(f"文件不存在: {self.file_path}")
|
||||
|
||||
# 检查文件扩展名是否为Excel支持的格式
|
||||
if not self.file_path.lower().endswith(('.xls', '.xlsx')):
|
||||
raise ValueError("输入文件不是有效的 Excel 文件(应为 .xls 或 .xlsx 格式)")
|
||||
|
||||
try:
|
||||
# 打开Excel文件并获取所有sheet名称
|
||||
excel_file = pd.ExcelFile(self.file_path, engine='openpyxl')
|
||||
sheet_names = excel_file.sheet_names
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"无法打开 Excel 文件,请确认该文件未被损坏或占用。错误: {type(e).__name__}: {e}")
|
||||
|
||||
# 定义优先查找的工作表名
|
||||
target_sheets = ["Merged All Tests", "All Tests"]
|
||||
selected_sheet = None
|
||||
|
||||
for sheet in target_sheets:
|
||||
if sheet in sheet_names:
|
||||
selected_sheet = sheet
|
||||
break
|
||||
|
||||
if selected_sheet is None:
|
||||
raise ValueError(
|
||||
f"未找到指定的工作表: {' 或 '.join(target_sheets)}。"
|
||||
f"当前文件包含的工作表有: {sheet_names}"
|
||||
)
|
||||
|
||||
try:
|
||||
# 使用更高效的方式读取指定sheet
|
||||
self.df = pd.read_excel(
|
||||
self.file_path,
|
||||
sheet_name=selected_sheet,
|
||||
engine='openpyxl'
|
||||
)
|
||||
except Exception as e:
|
||||
raise RuntimeError(
|
||||
f"读取 Excel 失败,工作表: '{selected_sheet}'。错误: {type(e).__name__}: {e}"
|
||||
)
|
||||
|
||||
if self.df.empty:
|
||||
raise ValueError("工作表为空,无法处理")
|
||||
|
||||
# 校验必要列
|
||||
missing_columns = [col for col in self.required_columns if col not in self.df.columns]
|
||||
if missing_columns:
|
||||
raise KeyError(f"缺少必要列: {missing_columns}")
|
||||
|
||||
# 记录上下限列名
|
||||
self.col_lower = self._find_column_case_insensitive([
|
||||
"Lower Limit", "lower limit", "lower_limit", "ll", "lower"
|
||||
])
|
||||
self.col_upper = self._find_column_case_insensitive([
|
||||
"Upper Limit", "upper limit", "upper_limit", "ul", "upper"
|
||||
])
|
||||
|
||||
loading_time = time.time() - start_time
|
||||
print(f"数据加载完成: {len(self.df)} 行 × {self.df.shape[1]} 列")
|
||||
print(f"耗时: {loading_time:.2f}s")
|
||||
|
||||
# 显示列信息摘要
|
||||
print(f"检测到下限列: {self.col_lower or '无'}")
|
||||
print(f"检测到上限列: {self.col_upper or '无'}")
|
||||
|
||||
def get_keyword(self) -> Tuple[pd.DataFrame, str, List[str]]:
|
||||
"""获取用户输入的关键词并筛选数据"""
|
||||
self._print_stage("筛选关键词")
|
||||
|
||||
while True:
|
||||
keyword = input("请输入筛选关键词(匹配 'Test Name New'): ").strip()
|
||||
if not keyword:
|
||||
print("关键词不能为空,请重新输入")
|
||||
continue
|
||||
break
|
||||
|
||||
mask = self.df["Test Name New"].astype(str).str.contains(keyword, case=False, na=False)
|
||||
filtered_df = self.df.loc[mask].copy()
|
||||
|
||||
if filtered_df.empty:
|
||||
raise ValueError(f"没有找到包含关键词 '{keyword}' 的测试项")
|
||||
|
||||
unique_tests = filtered_df["Test Name New"].unique().tolist()
|
||||
print(f"匹配到 {len(filtered_df)} 行数据,涉及 {len(unique_tests)} 个不同测试项")
|
||||
return filtered_df, keyword, unique_tests
|
||||
|
||||
def create_output_dir(self) -> None:
|
||||
"""创建输出目录"""
|
||||
self._print_stage("创建输出目录")
|
||||
|
||||
if not self.file_path:
|
||||
raise ValueError("文件路径未设置")
|
||||
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
base_dir = os.path.dirname(self.file_path)
|
||||
self.output_dir = os.path.join(base_dir, f"scatter_plots_{timestamp}")
|
||||
|
||||
os.makedirs(self.output_dir, exist_ok=True)
|
||||
print(f"输出目录: {self.output_dir}")
|
||||
|
||||
@staticmethod
|
||||
def _safe_filename(name: str) -> str:
|
||||
"""生成安全的文件名"""
|
||||
safe = "".join(c for c in str(name) if c.isalnum() or c in (" ", "_", "-")).strip()
|
||||
return safe or "Unknown_Test"
|
||||
|
||||
def _extract_limits(self, df_one_test: pd.DataFrame) -> Tuple[
|
||||
Optional[float], Optional[float], List[float], List[float]]:
|
||||
"""提取某个测试项的上下限数值"""
|
||||
lower_plot = upper_plot = None
|
||||
lower_set = []
|
||||
upper_set = []
|
||||
|
||||
if self.col_lower and self.col_lower in df_one_test.columns:
|
||||
lower_vals = self._clean_and_convert_series(df_one_test[self.col_lower], 'numeric').dropna().unique()
|
||||
lower_set = sorted(lower_vals.tolist()) if len(lower_vals) > 0 else []
|
||||
if lower_set:
|
||||
lower_plot = min(lower_set)
|
||||
|
||||
if self.col_upper and self.col_upper in df_one_test.columns:
|
||||
upper_vals = self._clean_and_convert_series(df_one_test[self.col_upper], 'numeric').dropna().unique()
|
||||
upper_set = sorted(upper_vals.tolist()) if len(upper_vals) > 0 else []
|
||||
if upper_set:
|
||||
upper_plot = max(upper_set)
|
||||
|
||||
return lower_plot, upper_plot, lower_set, upper_set
|
||||
|
||||
@staticmethod
|
||||
def _clean_and_convert_series(series: pd.Series, target_type: str = 'numeric') -> pd.Series:
|
||||
"""统一的系列清洗和转换方法"""
|
||||
if series.empty:
|
||||
return series
|
||||
|
||||
if target_type == 'numeric':
|
||||
# 数值转换优化
|
||||
if pd.api.types.is_numeric_dtype(series):
|
||||
return series.astype(float)
|
||||
|
||||
# 批量字符串处理
|
||||
cleaned = series.astype(str).str.replace(r'[, ]', '', regex=True).str.strip()
|
||||
return pd.to_numeric(cleaned, errors='coerce')
|
||||
|
||||
elif target_type == 'datetime':
|
||||
return TestReportScatterPlotter._convert_to_datetime(series)
|
||||
|
||||
return series
|
||||
|
||||
@staticmethod
|
||||
def _convert_to_datetime(series: pd.Series) -> pd.Series:
|
||||
"""优化的日期时间转换"""
|
||||
if pd.api.types.is_datetime64_any_dtype(series):
|
||||
return series
|
||||
|
||||
# 预处理:转换为数值和字符串两种形式
|
||||
numeric_series = pd.to_numeric(series, errors='coerce')
|
||||
string_series = series.astype(str).str.strip()
|
||||
|
||||
result = pd.Series(pd.NaT, index=series.index, dtype='datetime64[ns]')
|
||||
|
||||
# 数值时间戳处理
|
||||
masks = {
|
||||
'ms': numeric_series >= 1e11,
|
||||
's': (numeric_series >= 1e9) & (numeric_series < 1e11),
|
||||
'excel': (numeric_series > 20000) & (numeric_series < 60000)
|
||||
}
|
||||
|
||||
for mask_type, mask in masks.items():
|
||||
if mask.any():
|
||||
if mask_type == 'ms':
|
||||
result.loc[mask] = pd.to_datetime(numeric_series.loc[mask], unit='ms')
|
||||
elif mask_type == 's':
|
||||
result.loc[mask] = pd.to_datetime(numeric_series.loc[mask], unit='s')
|
||||
elif mask_type == 'excel':
|
||||
origin = pd.Timestamp('1899-12-30')
|
||||
result.loc[mask] = origin + pd.to_timedelta(numeric_series.loc[mask], unit='D')
|
||||
|
||||
# 字符串日期处理
|
||||
remaining_mask = result.isna()
|
||||
if remaining_mask.any():
|
||||
remaining_strings = string_series.loc[remaining_mask]
|
||||
|
||||
# 特定格式优先处理
|
||||
format_patterns = [
|
||||
(r'^\d{4}-\d{2}-\d{2} \d{2}-\d{2}-\d{2}$', '%Y-%m-%d %H-%M-%S'),
|
||||
]
|
||||
|
||||
for pattern, date_format in format_patterns:
|
||||
format_mask = remaining_strings.str.match(pattern)
|
||||
if format_mask.any():
|
||||
result.loc[remaining_mask[remaining_mask].index[format_mask]] = pd.to_datetime(
|
||||
remaining_strings.loc[format_mask], format=date_format, errors='coerce'
|
||||
)
|
||||
|
||||
# 通用解析
|
||||
still_na_mask = result.isna() & remaining_mask
|
||||
if still_na_mask.any():
|
||||
result.loc[still_na_mask] = pd.to_datetime(
|
||||
string_series.loc[still_na_mask], errors='coerce'
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def _preprocess_test_data(self, test_data: pd.DataFrame) -> pd.DataFrame:
|
||||
"""数据预处理"""
|
||||
# 数值转换
|
||||
test_data['Measurement_num'] = self._clean_and_convert_series(
|
||||
test_data['Measurement'], 'numeric'
|
||||
)
|
||||
test_data['TestTime_dt'] = self._clean_and_convert_series(
|
||||
test_data['Test Time'], 'datetime'
|
||||
)
|
||||
|
||||
# 去除无效数据
|
||||
valid_data = test_data.dropna(subset=['Measurement_num', 'TestTime_dt'])
|
||||
return valid_data.sort_values('TestTime_dt')
|
||||
|
||||
def _calculate_statistics(self, y_data: pd.Series) -> Dict[str, float]:
|
||||
"""计算统计信息"""
|
||||
stats = {
|
||||
'count': len(y_data),
|
||||
'mean': y_data.mean(),
|
||||
'median': y_data.median(),
|
||||
'min': y_data.min(),
|
||||
'max': y_data.max(),
|
||||
'std': y_data.std(),
|
||||
'q1': y_data.quantile(0.25),
|
||||
'q3': y_data.quantile(0.75)
|
||||
}
|
||||
return stats
|
||||
|
||||
def _add_statistics_textbox(self, ax, stats: Dict[str, float],
|
||||
x_pos: float = 1.02, y_pos: float = 0.98) -> None:
|
||||
"""在图表右侧添加统计信息文本框"""
|
||||
# 使用英文标签避免中文显示问题
|
||||
stats_text = (
|
||||
f"Count: {stats['count']}\n"
|
||||
f"Mean: {stats['mean']:.4f}\n"
|
||||
f"Median: {stats['median']:.4f}\n"
|
||||
f"Min: {stats['min']:.4f}\n"
|
||||
f"Max: {stats['max']:.4f}\n"
|
||||
f"Std: {stats['std']:.4f}\n"
|
||||
f"Q1: {stats['q1']:.4f}\n"
|
||||
f"Q3: {stats['q3']:.4f}"
|
||||
)
|
||||
|
||||
# 添加文本框到右侧,使用英文字体
|
||||
props = dict(boxstyle='round', facecolor='wheat', alpha=0.8)
|
||||
ax.text(x_pos, y_pos, stats_text, transform=ax.transAxes, fontsize=8,
|
||||
verticalalignment='top', horizontalalignment='left', # 左对齐
|
||||
bbox=props, fontfamily='monospace')
|
||||
|
||||
def _add_statistics_lines(self, ax, stats: Dict[str, float],
|
||||
x_min: float, x_max: float) -> None:
|
||||
"""添加统计线到图表"""
|
||||
# 添加平均值线
|
||||
ax.hlines(y=stats['mean'], xmin=x_min, xmax=x_max,
|
||||
colors='orange', linestyles='-', linewidth=1.5, alpha=0.7, label='Mean')
|
||||
|
||||
# 添加中位数线
|
||||
ax.hlines(y=stats['median'], xmin=x_min, xmax=x_max,
|
||||
colors='purple', linestyles='-.', linewidth=1.5, alpha=0.7, label='Median')
|
||||
|
||||
# 添加Q1和Q3线
|
||||
ax.hlines(y=stats['q1'], xmin=x_min, xmax=x_max,
|
||||
colors='gray', linestyles=':', linewidth=1.0, alpha=0.7, label='Q1')
|
||||
ax.hlines(y=stats['q3'], xmin=x_min, xmax=x_max,
|
||||
colors='gray', linestyles=':', linewidth=1.0, alpha=0.7, label='Q3')
|
||||
|
||||
def _configure_plot(self, ax, test_data: pd.DataFrame, test_name: str,
|
||||
lower_plot: Optional[float], upper_plot: Optional[float]) -> None:
|
||||
"""配置图形属性"""
|
||||
# 计算统计信息
|
||||
y_data = test_data['Measurement_num']
|
||||
stats = self._calculate_statistics(y_data)
|
||||
|
||||
# 获取时间范围用于统计线
|
||||
x_min = test_data['TestTime_dt'].min()
|
||||
x_max = test_data['TestTime_dt'].max()
|
||||
|
||||
# Y轴范围计算
|
||||
y_min, y_max = y_data.min(), y_data.max()
|
||||
y_candidates = [y_min, y_max]
|
||||
|
||||
# 绘制限值线
|
||||
custom_lines = []
|
||||
if lower_plot is not None:
|
||||
y_candidates.append(lower_plot)
|
||||
ax.axhline(y=lower_plot, color='green', linestyle='--', linewidth=1.2)
|
||||
custom_lines.append(Line2D([0], [0], color='green', linestyle='--', label="Lower Limit"))
|
||||
|
||||
if upper_plot is not None:
|
||||
y_candidates.append(upper_plot)
|
||||
ax.axhline(y=upper_plot, color='red', linestyle='--', linewidth=1.2)
|
||||
custom_lines.append(Line2D([0], [0], color='red', linestyle='--', label="Upper Limit"))
|
||||
|
||||
# 添加统计线
|
||||
self._add_statistics_lines(ax, stats, x_min, x_max)
|
||||
|
||||
# 设置范围
|
||||
valid_candidates = [y for y in y_candidates if pd.notna(y)]
|
||||
if valid_candidates:
|
||||
y_min_plot = min(valid_candidates)
|
||||
y_max_plot = max(valid_candidates)
|
||||
y_range = y_max_plot - y_min_plot
|
||||
if y_range == 0:
|
||||
y_range = abs(y_max_plot) * 0.1 if y_max_plot != 0 else 1.0
|
||||
y_min_plot = y_min_plot - y_range / 2
|
||||
y_max_plot = y_max_plot + y_range / 2
|
||||
ax.set_ylim(y_min_plot - 0.1 * y_range, y_max_plot + 0.1 * y_range)
|
||||
|
||||
# 添加统计信息文本框到右侧
|
||||
self._add_statistics_textbox(ax, stats)
|
||||
|
||||
# 设置标题和标签,使用英文避免中文问题
|
||||
ax.set_title(f"Scatter Plot - {test_name}\n"
|
||||
f"Mean: {stats['mean']:.4f}, Median: {stats['median']:.4f}, "
|
||||
f"Range: [{stats['min']:.4f}, {stats['max']:.4f}]",
|
||||
fontsize=10)
|
||||
ax.set_xlabel("Test Time")
|
||||
ax.set_ylabel("Measurement Value")
|
||||
ax.grid(True, alpha=0.3)
|
||||
ax.tick_params(axis='x', rotation=45)
|
||||
|
||||
# 图例处理 - 优化位置在右侧
|
||||
handles, labels = ax.get_legend_handles_labels()
|
||||
if custom_lines:
|
||||
handles.extend(custom_lines)
|
||||
labels.extend([line.get_label() for line in custom_lines])
|
||||
|
||||
if handles:
|
||||
# 根据图例项数量决定图例位置和布局
|
||||
if len(handles) > 10: # 如果图例项很多,使用两列布局
|
||||
ncol = 2
|
||||
# 调整图例位置,确保不遮挡数据
|
||||
ax.legend(handles=handles, labels=labels, title="Legend",
|
||||
fontsize=8, loc='center left', bbox_to_anchor=(1.05, 0.5),
|
||||
ncol=ncol, frameon=True, fancybox=True, shadow=True)
|
||||
else:
|
||||
# 图例项较少时使用单列布局
|
||||
ax.legend(handles=handles, labels=labels, title="Legend",
|
||||
fontsize=8, loc='center left', bbox_to_anchor=(1.05, 0.5),
|
||||
frameon=True, fancybox=True, shadow=True)
|
||||
|
||||
def _save_plot(self, fig, test_name: str) -> None:
|
||||
"""保存图形"""
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
safe_name = self._safe_filename(test_name)
|
||||
filename = f"{safe_name}_{timestamp}.png"
|
||||
output_path = os.path.join(self.output_dir, filename)
|
||||
|
||||
# 调整布局以确保图例完整显示
|
||||
fig.savefig(output_path, dpi=300, bbox_inches='tight')
|
||||
plt.close(fig)
|
||||
print(f"已保存: {output_path}")
|
||||
|
||||
def plot_scatter(self, filtered_df: pd.DataFrame, unique_tests: List[str]) -> None:
|
||||
"""优化的散点图绘制方法"""
|
||||
self._print_stage("生成散点图")
|
||||
total_tests = len(unique_tests)
|
||||
start_time = time.time()
|
||||
|
||||
for i, test_name in enumerate(unique_tests, 1):
|
||||
self._print_progress(i, total_tests, "测试项绘图")
|
||||
|
||||
# 使用缓存避免重复计算
|
||||
cache_key = f"test_{hash(test_name)}"
|
||||
if cache_key in self._processed_data_cache:
|
||||
test_data = self._processed_data_cache[cache_key]
|
||||
else:
|
||||
test_data = filtered_df[filtered_df["Test Name New"] == test_name].copy()
|
||||
# 预处理数据
|
||||
test_data = self._preprocess_test_data(test_data)
|
||||
self._processed_data_cache[cache_key] = test_data
|
||||
|
||||
if test_data.empty:
|
||||
print(f"\n跳过 '{test_name}' - 无有效的 Measurement/Test Time 数据")
|
||||
continue
|
||||
|
||||
# 提取限值信息
|
||||
lower_plot, upper_plot, lower_set, upper_set = self._extract_limits(test_data)
|
||||
|
||||
# 输出限值信息
|
||||
limit_info = []
|
||||
if lower_set:
|
||||
limit_info.append(f"Lower unique={len(lower_set)}, used={lower_plot}")
|
||||
else:
|
||||
limit_info.append("Lower N/A")
|
||||
if upper_set:
|
||||
limit_info.append(f"Upper unique={len(upper_set)}, used={upper_plot}")
|
||||
else:
|
||||
limit_info.append("Upper N/A")
|
||||
|
||||
# 计算并输出统计信息
|
||||
y_data = test_data['Measurement_num']
|
||||
stats = self._calculate_statistics(y_data)
|
||||
stat_info = (
|
||||
f"数据点: {stats['count']}, "
|
||||
f"均值: {stats['mean']:.4f}, "
|
||||
f"中位数: {stats['median']:.4f}, "
|
||||
f"范围: [{stats['min']:.4f}, {stats['max']:.4f}]"
|
||||
)
|
||||
|
||||
print(f"\n→ 绘制: '{test_name}' | {stat_info} | 限值: {', '.join(limit_info)}")
|
||||
|
||||
# 创建图形 - 增大图像尺寸以容纳图例和统计信息
|
||||
sn_count = len(test_data["SN"].unique()) if "SN" in test_data.columns else 1
|
||||
|
||||
# 根据SN数量和预期图例项数量调整图形大小
|
||||
base_width = 14 # 增加宽度以容纳统计信息
|
||||
base_height = 9 # 增加高度以容纳更多信息
|
||||
|
||||
# 如果SN数量多,增加图形宽度以容纳图例
|
||||
if sn_count > 5:
|
||||
fig_width = base_width + min(sn_count / 5, 6) # 最大增加6个单位宽度
|
||||
else:
|
||||
fig_width = base_width
|
||||
|
||||
fig, ax = plt.subplots(figsize=(fig_width, base_height))
|
||||
|
||||
# 分组绘制
|
||||
groups = list(test_data.groupby("SN")) if "SN" in test_data.columns else [("Unknown_SN", test_data)]
|
||||
|
||||
for j, (sn, group) in enumerate(groups, 1):
|
||||
ax.scatter(group['TestTime_dt'], group['Measurement_num'],
|
||||
label=str(sn), alpha=0.7, s=25)
|
||||
if j % 10 == 0 or j == len(groups):
|
||||
self._print_progress(j, len(groups), "SN分组绘制")
|
||||
|
||||
# 配置图形
|
||||
self._configure_plot(ax, test_data, test_name, lower_plot, upper_plot)
|
||||
|
||||
# 调整布局,为右侧统计信息和图例留出空间
|
||||
plt.tight_layout()
|
||||
plt.subplots_adjust(right=0.8 if sn_count <= 10 else 0.7) # 为右侧统计信息留出更多空间
|
||||
|
||||
# 保存图像
|
||||
self._save_plot(fig, test_name)
|
||||
|
||||
total_time = time.time() - start_time
|
||||
print(f"\n全部绘图完成,总耗时: {total_time:.2f}s")
|
||||
print(f"所有图表已保存到: {self.output_dir}")
|
||||
|
||||
def run(self) -> None:
|
||||
"""运行主程序"""
|
||||
try:
|
||||
self.get_file_path()
|
||||
self.load_data()
|
||||
filtered_df, keyword, unique_tests = self.get_keyword()
|
||||
self.create_output_dir()
|
||||
self.plot_scatter(filtered_df, unique_tests)
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n❌ 发生错误: {type(e).__name__}: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
plotter = TestReportScatterPlotter()
|
||||
plotter.run()
|
||||
251
main.py
Normal file
251
main.py
Normal file
@@ -0,0 +1,251 @@
|
||||
import pandas as pd
|
||||
import os
|
||||
import glob
|
||||
import re
|
||||
from datetime import datetime
|
||||
import tkinter as tk
|
||||
from tkinter import filedialog
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
class BOMConsolidator:
|
||||
def __init__(self):
|
||||
self.master_data = defaultdict(dict)
|
||||
self.required_columns = ['Partnumber', 'Purchase_Code', 'MF_PN', 'Description',
|
||||
'Part_Type', 'MF_NAME', 'PCB_Footprint', 'Quantity', 'Reference']
|
||||
self.file_quantities = {}
|
||||
self.consolidated_report = None
|
||||
self.inconsistency_count = 0
|
||||
self.processed_files = 0
|
||||
self.processed_rows = 0
|
||||
self.output_folder = ""
|
||||
|
||||
def find_valid_sheet(self, file_path):
|
||||
"""定位包含有效BOM的Sheet"""
|
||||
xl = pd.ExcelFile(file_path)
|
||||
for sheet_name in xl.sheet_names:
|
||||
df = pd.read_excel(file_path, sheet_name=sheet_name, header=None)
|
||||
for i in range(len(df)):
|
||||
headers = df.iloc[i].values
|
||||
if all(col in headers for col in ['Item', 'Partnumber', 'Purchase_Code', 'MF_PN']):
|
||||
return sheet_name, i
|
||||
return None, None
|
||||
|
||||
def clean_column_names(self, df):
|
||||
"""清洗列名并标准化"""
|
||||
df.columns = df.columns.str.strip().str.replace(r'\s+', '_', regex=True)
|
||||
df.columns = df.columns.str.replace(r'[^a-zA-Z0-9_]', '', regex=True)
|
||||
return df
|
||||
|
||||
def process_file(self, file_path):
|
||||
"""处理单个BOM文件"""
|
||||
filename = os.path.basename(file_path)
|
||||
print(f"处理文件: {filename}...")
|
||||
|
||||
sheet_name, header_row = self.find_valid_sheet(file_path)
|
||||
if not sheet_name:
|
||||
print(f" ! 未找到有效BOM表: {filename}")
|
||||
return False
|
||||
|
||||
df = pd.read_excel(file_path, sheet_name=sheet_name, header=header_row)
|
||||
df = self.clean_column_names(df)
|
||||
|
||||
# 验证必要字段
|
||||
missing_cols = [col for col in self.required_columns if col not in df.columns]
|
||||
if missing_cols:
|
||||
print(f" ! 缺少必要列: {', '.join(missing_cols)}")
|
||||
return False
|
||||
|
||||
print(f" √ 找到有效Sheet: {sheet_name} (共{len(df)}行)")
|
||||
self.file_quantities[filename] = {}
|
||||
self.processed_files += 1
|
||||
|
||||
# 处理每行数据
|
||||
for _, row in df.iterrows():
|
||||
self.process_row(row, filename)
|
||||
self.processed_rows += 1
|
||||
|
||||
return True
|
||||
|
||||
def process_row(self, row, filename):
|
||||
"""处理单行数据"""
|
||||
# 确定合并主键
|
||||
key = row['Partnumber'] if pd.notna(row['Partnumber']) and row['Partnumber'] != '' else row['MF_PN']
|
||||
if pd.isna(key) or key == '':
|
||||
return
|
||||
|
||||
# 首次记录该物料
|
||||
if key not in self.master_data:
|
||||
self.master_data[key] = {
|
||||
'Partnumber': row['Partnumber'],
|
||||
'Purchase_Code': row['Purchase_Code'],
|
||||
'MF_PN': row['MF_PN'],
|
||||
'Description': row.get('Description', ''),
|
||||
'Part_Type': row.get('Part_Type', ''),
|
||||
'MF_NAME': row.get('MF_NAME', ''),
|
||||
'PCB_Footprint': row.get('PCB_Footprint', ''),
|
||||
'quantity_data': {}, # 存储每个文件的数量
|
||||
'inconsistencies': [] # 存储不一致信息
|
||||
}
|
||||
|
||||
# 检查字段一致性
|
||||
current_data = self.master_data[key]
|
||||
fields_to_check = ['Purchase_Code', 'MF_PN', 'Part_Type', 'MF_NAME', 'PCB_Footprint']
|
||||
|
||||
for field in fields_to_check:
|
||||
# 处理字段名称差异
|
||||
db_field = 'Part Type' if field == 'Part_Type' else field
|
||||
|
||||
current_val = str(current_data[field])
|
||||
new_val = str(row.get(db_field, ''))
|
||||
|
||||
# 忽略空值和'nan'字符串
|
||||
if new_val in ['', 'nan', 'NaN', 'NaT']:
|
||||
continue
|
||||
|
||||
# 比较当前值和新值
|
||||
if current_val != new_val:
|
||||
current_data['inconsistencies'].append(
|
||||
f"{field}不一致: {current_val} ≠ {new_val} (文件: {filename})"
|
||||
)
|
||||
|
||||
# 检查Reference数量和Quantity是否匹配
|
||||
ref_count = 0
|
||||
if pd.notna(row['Reference']) and row['Reference'] != '':
|
||||
ref_list = str(row['Reference']).split(',')
|
||||
ref_count = len([ref for ref in ref_list if ref.strip() != ''])
|
||||
|
||||
try:
|
||||
quantity = int(row['Quantity'])
|
||||
if ref_count != quantity:
|
||||
current_data['inconsistencies'].append(
|
||||
f"Reference数量不符: {ref_count}个位置 ≠ Quantity={quantity} (文件: {filename})"
|
||||
)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
# 记录当前文件的数量
|
||||
try:
|
||||
qty_val = int(row['Quantity'])
|
||||
self.file_quantities[filename][key] = qty_val
|
||||
current_data['quantity_data'][filename] = qty_val
|
||||
except (ValueError, TypeError):
|
||||
self.file_quantities[filename][key] = 0
|
||||
current_data['quantity_data'][filename] = 0
|
||||
|
||||
# 更新不一致计数
|
||||
if current_data['inconsistencies']:
|
||||
self.inconsistency_count += 1
|
||||
|
||||
def generate_report(self):
|
||||
"""生成合并报告"""
|
||||
if not self.master_data:
|
||||
print("无有效数据可生成报告")
|
||||
return None
|
||||
|
||||
print(f"\n生成合并报告,共{len(self.master_data)}种物料...")
|
||||
|
||||
# 准备报告数据结构
|
||||
report_data = []
|
||||
file_columns = sorted(self.file_quantities.keys())
|
||||
|
||||
for key, data in self.master_data.items():
|
||||
row = {
|
||||
'Partnumber': data['Partnumber'],
|
||||
'Purchase_Code': data['Purchase_Code'],
|
||||
'MF_PN': data['MF_PN'],
|
||||
'Description': data['Description'],
|
||||
'Part Type': data['Part_Type'],
|
||||
'MF_NAME': data['MF_NAME'],
|
||||
'PCB_Footprint': data['PCB_Footprint'],
|
||||
'检查信息': '; '.join(data['inconsistencies'])
|
||||
}
|
||||
|
||||
# 添加各文件数量
|
||||
total = 0
|
||||
for file in file_columns:
|
||||
qty = data['quantity_data'].get(file, 0)
|
||||
row[file] = qty
|
||||
total += qty
|
||||
row['合计'] = total
|
||||
|
||||
report_data.append(row)
|
||||
|
||||
# 创建DataFrame
|
||||
self.consolidated_report = pd.DataFrame(report_data)
|
||||
|
||||
# 生成带时间戳的文件名
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
output_path = os.path.join(self.output_folder, f"BOM合并报告_{timestamp}.xlsx")
|
||||
|
||||
# 保存报告
|
||||
self.consolidated_report.to_excel(output_path, index=False)
|
||||
|
||||
# 返回统计信息和路径
|
||||
stats = {
|
||||
'output_path': output_path,
|
||||
'file_count': self.processed_files,
|
||||
'material_count': len(self.master_data),
|
||||
'inconsistency_count': self.inconsistency_count,
|
||||
'processed_rows': self.processed_rows
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
def select_folder():
|
||||
"""弹出文件夹选择对话框"""
|
||||
root = tk.Tk()
|
||||
root.withdraw()
|
||||
folder_selected = filedialog.askdirectory(title='选择BOM文件所在文件夹')
|
||||
return folder_selected
|
||||
|
||||
|
||||
def main():
|
||||
# 初始化合并器
|
||||
bom_processor = BOMConsolidator()
|
||||
|
||||
# 选择文件夹
|
||||
folder_path = select_folder()
|
||||
if not folder_path:
|
||||
print("未选择文件夹,程序退出")
|
||||
return
|
||||
|
||||
bom_processor.output_folder = folder_path
|
||||
|
||||
# 获取所有Excel文件
|
||||
bom_files = glob.glob(os.path.join(folder_path, "*.xlsx"))
|
||||
if not bom_files:
|
||||
print("文件夹中没有Excel文件")
|
||||
return
|
||||
|
||||
print(f"找到 {len(bom_files)} 个Excel文件,开始处理...")
|
||||
|
||||
# 处理文件
|
||||
processed_count = 0
|
||||
for file_path in bom_files:
|
||||
success = bom_processor.process_file(file_path)
|
||||
if success:
|
||||
processed_count += 1
|
||||
|
||||
# 生成报告
|
||||
if bom_processor.master_data:
|
||||
stats = bom_processor.generate_report()
|
||||
|
||||
# 打印汇总信息
|
||||
print("\n" + "=" * 40)
|
||||
print("BOM合并完成! 汇总信息:")
|
||||
print(f"处理文件夹: {folder_path}")
|
||||
print(f"扫描文件数: {len(bom_files)}")
|
||||
print(f"成功处理文件数: {processed_count}")
|
||||
print(f"处理行数: {stats['processed_rows']}")
|
||||
print(f"合并物料种类数: {stats['material_count']}")
|
||||
print(f"检测到不一致条目数: {stats['inconsistency_count']}")
|
||||
print(f"报告已保存至: {stats['output_path']}")
|
||||
print("=" * 40)
|
||||
else:
|
||||
print("没有有效数据生成报告")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
9
tempReportProcess/.gitignore
vendored
Normal file
9
tempReportProcess/.gitignore
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
/build/*
|
||||
/build
|
||||
/dist/*
|
||||
/dist
|
||||
/source/*
|
||||
/source
|
||||
|
||||
|
||||
tempReportProcess_V2.py
|
||||
248
tempReportProcess/tempReportProcess_V1.py
Normal file
248
tempReportProcess/tempReportProcess_V1.py
Normal file
@@ -0,0 +1,248 @@
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
from datetime import datetime
|
||||
import tkinter as tk
|
||||
from tkinter import filedialog
|
||||
import os
|
||||
import matplotlib.dates as mdates
|
||||
from jinja2 import Template
|
||||
from matplotlib import font_manager, rcParams
|
||||
|
||||
|
||||
class TemperatureDataAnalyzer:
|
||||
def __init__(self):
|
||||
self.data = None
|
||||
self.file_path = None
|
||||
self.timestamps = []
|
||||
self.temperatures = []
|
||||
self.statuses = []
|
||||
self._configure_chinese_font() # 配置中文字体,修复中文字符缺失警告
|
||||
|
||||
def _configure_chinese_font(self):
|
||||
"""
|
||||
配置 Matplotlib 中文字体,避免中文字符缺失的警告。
|
||||
会尝试常见的中文字体并设置 axes.unicode_minus 为 False。
|
||||
"""
|
||||
try:
|
||||
# 常见中文字体候选(跨平台)
|
||||
candidates = [
|
||||
"Microsoft YaHei", "Microsoft YaHei UI", # Windows
|
||||
"SimHei", "SimSun", # Windows(黑体/宋体)
|
||||
"PingFang SC", "Heiti SC", # macOS
|
||||
"Noto Sans CJK SC", "Source Han Sans SC", "WenQuanYi Micro Hei", # Linux
|
||||
"Arial Unicode MS" # 覆盖广的 Unicode 字体
|
||||
]
|
||||
available = {f.name for f in font_manager.fontManager.ttflist}
|
||||
for name in candidates:
|
||||
if name in available:
|
||||
rcParams["font.sans-serif"] = [name]
|
||||
rcParams["axes.unicode_minus"] = False
|
||||
# 可选:打印使用的字体名称
|
||||
# print(f"使用中文字体: {name}")
|
||||
return
|
||||
# 如果没有找到常见中文字体,给出提示
|
||||
rcParams["axes.unicode_minus"] = False
|
||||
print("未检测到常见中文字体,图中中文可能无法正常显示。建议安装 'Noto Sans CJK SC' 或 'Microsoft YaHei'。")
|
||||
except Exception as e:
|
||||
print(f"中文字体配置失败: {e}")
|
||||
|
||||
def select_file(self):
|
||||
"""手动选择CSV文件"""
|
||||
root = tk.Tk()
|
||||
root.withdraw() # 隐藏主窗口
|
||||
|
||||
file_types = [("CSV files", "*.csv"), ("All files", "*.*")]
|
||||
self.file_path = filedialog.askopenfilename(title="选择温度数据CSV文件", filetypes=file_types)
|
||||
|
||||
if not self.file_path:
|
||||
print("未选择文件,程序退出")
|
||||
return False
|
||||
return True
|
||||
|
||||
def load_and_process_data(self):
|
||||
"""加载和处理数据"""
|
||||
try:
|
||||
# 读取CSV文件,无表头
|
||||
self.data = pd.read_csv(self.file_path, header=None)
|
||||
|
||||
# 重命名列以便于引用
|
||||
self.data.columns = ['timestamp', 'temperature', 'status']
|
||||
|
||||
# 转换时间戳格式(文本例如:10/29/2025 2:20:41 PM)
|
||||
self.data['datetime'] = pd.to_datetime(self.data['timestamp'], format='%m/%d/%Y %I:%M:%S %p')
|
||||
|
||||
# 提取处理后的数据
|
||||
self.timestamps = self.data['datetime']
|
||||
self.temperatures = self.data['temperature']
|
||||
self.statuses = self.data['status']
|
||||
|
||||
print(f"成功加载 {len(self.data)} 条记录")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"数据处理错误: {e}")
|
||||
return False
|
||||
|
||||
def create_scatter_plots(self):
|
||||
"""创建散点图"""
|
||||
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
|
||||
|
||||
# 温度散点图
|
||||
sc1 = ax1.scatter(self.timestamps, self.temperatures, c=self.temperatures,
|
||||
cmap='coolwarm', alpha=0.7, s=20)
|
||||
ax1.set_title('温度随时间变化趋势')
|
||||
ax1.set_ylabel('温度 (°C)')
|
||||
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
|
||||
ax1.grid(True, linestyle='--', alpha=0.7)
|
||||
ax1.tick_params(axis='x', rotation=45)
|
||||
plt.colorbar(sc1, ax=ax1, label="温度(°C)")
|
||||
|
||||
# 状态散点图
|
||||
sc2 = ax2.scatter(self.timestamps, self.statuses, c=self.statuses,
|
||||
cmap='viridis', alpha=0.7, s=20)
|
||||
ax2.set_title('状态随时间变化')
|
||||
ax2.set_xlabel('时间')
|
||||
ax2.set_ylabel('状态值')
|
||||
ax2.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
|
||||
ax2.grid(True, linestyle='--', alpha=0.7)
|
||||
ax2.tick_params(axis='x', rotation=45)
|
||||
plt.colorbar(sc2, ax=ax2, label="状态值")
|
||||
|
||||
plt.tight_layout()
|
||||
return fig
|
||||
|
||||
def generate_statistics_report(self):
|
||||
"""生成统计报告"""
|
||||
stats = {
|
||||
'total_records': len(self.temperatures),
|
||||
'avg_temperature': round(self.temperatures.mean(), 2),
|
||||
'max_temperature': round(self.temperatures.max(), 2),
|
||||
'min_temperature': round(self.temperatures.min(), 2),
|
||||
'std_deviation': round(self.temperatures.std(), 2),
|
||||
'temp_range': round(self.temperatures.max() - self.temperatures.min(), 2),
|
||||
'start_time': self.timestamps.iloc[0].strftime('%Y-%m-%d %H:%M:%S'),
|
||||
'end_time': self.timestamps.iloc[-1].strftime('%Y-%m-%d %H:%M:%S'),
|
||||
'duration_hours': round((self.timestamps.iloc[-1] - self.timestamps.iloc[0]).total_seconds() / 3600, 2)
|
||||
}
|
||||
|
||||
# 状态分布统计
|
||||
status_counts = self.statuses.value_counts().to_dict()
|
||||
stats['status_distribution'] = status_counts
|
||||
|
||||
return stats
|
||||
|
||||
def save_fig_to_html(self, fig, output_path):
|
||||
"""将图形保存为HTML"""
|
||||
import io
|
||||
import base64
|
||||
|
||||
# 将图形转换为base64编码
|
||||
buf = io.BytesIO()
|
||||
fig.savefig(buf, format='png', dpi=150, bbox_inches='tight')
|
||||
buf.seek(0)
|
||||
img_str = base64.b64encode(buf.read()).decode('utf-8')
|
||||
buf.close()
|
||||
|
||||
# HTML模板(修复了多余的 '}')
|
||||
html_template = """
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>温度数据分析报告</title>
|
||||
<style>
|
||||
body { font-family: Arial, sans-serif; margin: 20px; }
|
||||
.header { background-color: #f0f0f0; padding: 15px; border-radius: 5px; }
|
||||
.section { margin-bottom: 30px; }
|
||||
.stats-table { width: 100%; border-collapse: collapse; }
|
||||
.stats-table th, .stats-table td { border: 1px solid #ddd; padding: 8px; text-align: left; }
|
||||
.stats-table th { background-color: #f2f2f2; }
|
||||
.image-container { text-align: center; margin: 20px 0; }
|
||||
h1, h2 { color: #333; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="header">
|
||||
<h1>温度数据分析报告</h1>
|
||||
<p><strong>数据文件:</strong> {{ file_name }}</p>
|
||||
<p><strong>生成时间:</strong> {{ generation_time }}</p>
|
||||
</div>
|
||||
|
||||
<div class="section">
|
||||
<h2>数据概览</h2>
|
||||
<table class="stats-table">
|
||||
<tr><th>项目</th><th>数值</th></tr>
|
||||
{% for key, value in statistics.items() %}
|
||||
{% if key != 'status_distribution' %}
|
||||
<tr><td>{{ key.replace('_', ' ').title() }}</td><td>{{ value }}</td></tr>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div class="section">
|
||||
<h2>状态分布</h2>
|
||||
<table class="stats-table">
|
||||
<tr><th>状态值</th><th>出现次数</th></tr>
|
||||
{% for status, count in statistics.status_distribution.items() %}
|
||||
<tr><td>{{ status }}</td><td>{{ count }}</td></tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div class="section">
|
||||
<h2>温度与状态时序图</h2>
|
||||
<div class="image-container">
|
||||
<img src="data:image/png;base64,{{ image_data }}" alt="温度与状态时序图">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
template = Template(html_template)
|
||||
rendered_html = template.render(
|
||||
file_name=self.file_path,
|
||||
generation_time=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
||||
statistics=self.generate_statistics_report(),
|
||||
image_data=img_str
|
||||
)
|
||||
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
f.write(rendered_html)
|
||||
|
||||
def run_analysis(self):
|
||||
"""运行完整分析流程"""
|
||||
if not self.select_file():
|
||||
return
|
||||
|
||||
if not self.load_and_process_data():
|
||||
return
|
||||
|
||||
# 创建图形
|
||||
fig = self.create_scatter_plots()
|
||||
|
||||
# 生成输出文件名(保存到选择的文件所在文件夹)
|
||||
base_filename = os.path.splitext(os.path.basename(self.file_path))[0]
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
output_filename = f"{base_filename}_{timestamp}.html"
|
||||
output_dir = os.path.dirname(self.file_path)
|
||||
output_path = os.path.join(output_dir, output_filename)
|
||||
|
||||
# 保存HTML报告到同一文件夹
|
||||
self.save_fig_to_html(fig, output_path)
|
||||
|
||||
print(f"分析完成!报告已保存至: {output_path}")
|
||||
|
||||
# 显示统计摘要
|
||||
stats = self.generate_statistics_report()
|
||||
print("\n=== 数据统计摘要 ===")
|
||||
for key, value in stats.items():
|
||||
if key != 'status_distribution':
|
||||
print(f"{key.replace('_', ' ').title()}: {value}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
analyzer = TemperatureDataAnalyzer()
|
||||
analyzer.run_analysis()
|
||||
Reference in New Issue
Block a user