import os import pandas as pd import numpy as np import tkinter as tk from tkinter import filedialog from datetime import datetime import warnings import re from openpyxl import Workbook from openpyxl.utils.dataframe import dataframe_to_rows warnings.filterwarnings('ignore', category=UserWarning, module='openpyxl') class BOMComparator: def __init__(self): self.column_mapping = { 'ITEM': 'Partnumber', 'HT PN': 'Partnumber', 'MF PN': 'MF_PN', 'MFG': 'MF_NAME', 'CRD': 'Reference', 'Description': 'Description', 'Qty': 'Quantity', '焊接方式': '焊接方式', 'Remark': '备注' } self.ignore_columns = ['备注'] self.required_columns = list(self.column_mapping.values()) self.change_columns = [ 'ITEM', 'HT PN', 'MF PN', 'MFG', 'CRD', 'Description', 'Qty', 'Remark' ] self.mandatory_keywords = ['item', 'partnumber', 'mfpn'] # 异常记录 self.validation_errors = [] self.stats = { 'old_bom_rows': 0, 'new_bom_rows': 0, 'changed_items': 0, 'added_items': 0, 'removed_items': 0, 'total_errors': 0 } def normalize_text(self, text): if pd.isna(text): return "" text = str(text) text = re.sub(r'[^a-zA-Z0-9\s]', '', text) return text.strip().lower() def find_header_row(self, df): print(f"扫描前 {min(20, len(df))} 行寻找标题行...") for i in range(min(20, len(df))): row_values = [self.normalize_text(cell) for cell in df.iloc[i].values] contains_all_keywords = True for keyword in self.mandatory_keywords: if not any(keyword in cell_value for cell_value in row_values): contains_all_keywords = False break if contains_all_keywords: print(f"✅ 找到有效标题行 (索引 {i}),包含所有必需关键词") return i error_msg = ( "❌ 未找到有效的标题行:所有标题行必须同时包含以下关键词:\n" f"- Item (或类似表述)\n" f"- Partnumber (或类似表述)\n" f"- MF_PN (或类似表述)\n\n" "在文件的前20行中没有找到同时包含所有关键词的行。" ) raise ValueError(error_msg) def find_active_sheet(self, file_path): print(f"扫描文件: {os.path.basename(file_path)}") xls = pd.ExcelFile(file_path) candidate_sheets = [] for sheet_name in xls.sheet_names: # 使用 BOM 或 PCBA 作为关键词 if any(keyword in sheet_name.lower() for keyword in ["bom", "pcba"]): candidate_sheets.append(sheet_name) print(f" 发现候选Sheet: {sheet_name} - 关键词匹配") # 第一步:优先检查第一个bom候选Sheet successful_sheet = None if candidate_sheets: for first_candidate in candidate_sheets: # 先检查第一个候选Sheet # first_candidate = candidate_sheets[0] try: print(f" 优先检查候选Sheet: {first_candidate}") df_preview = pd.read_excel( file_path, sheet_name=first_candidate, header=None, nrows=20, engine='openpyxl' ) header_row_idx = self.find_header_row(df_preview) print(f"✅ 在候选Sheet '{first_candidate}' 中找到标题行") # return first_candidate successful_sheet = first_candidate break except Exception as e: print(f" ❌ 优先候选Sheet '{first_candidate}': {str(e)}") # 移除失败的首选候选 # candidate_sheets.pop(0) # remove(值) - 移除指定值的元素 # candidate_sheets.remove(first_candidate) # 移除值为 'sheet_name' 的元素 continue if successful_sheet: return successful_sheet # 第二步:如果没找到bom候选Sheet或首选候选失败,遍历所有候选Sheet if not successful_sheet: candidate_sheets = xls.sheet_names print(" 未找到名称包含'BOM'的Sheet,将检查所有Sheet") # 遍历剩余候选Sheet for sheet_name in candidate_sheets: try: print(f" 检查Sheet: {sheet_name}") df_preview = pd.read_excel( file_path, sheet_name=sheet_name, header=None, nrows=20, engine='openpyxl' ) try: header_row_idx = self.find_header_row(df_preview) print(f"✅ 在Sheet '{sheet_name}' 中找到标题行") return sheet_name except ValueError as e: print(f" ❌ Sheet '{sheet_name}': {str(e)}") continue except Exception as e: print(f" 检查Sheet '{sheet_name}' 时出错: {str(e)}") continue # 第三步:如果所有候选Sheet都失败,尝试第一个Sheet作为备选 print("⚠️ 所有候选Sheet检查失败,尝试第一个Sheet") first_sheet = xls.sheet_names[0] try: df_preview = pd.read_excel( file_path, sheet_name=first_sheet, header=None, nrows=20, engine='openpyxl' ) header_row_idx = self.find_header_row(df_preview) print(f"✅ 在备份Sheet '{first_sheet}' 中找到标题行") return first_sheet except Exception as e: print(f"❌ 备份Sheet '{first_sheet}' 也失败: {str(e)}") return None def validate_bom(self, bom_df, file_name, sheet_name): """验证BOM数据并收集异常""" errors = [] # 1. 检查Partnumber是否有重复 dup_partnumbers = bom_df[bom_df.duplicated('Partnumber', keep=False)] if not dup_partnumbers.empty: print(f"⚠️ 发现重复的Partnumber: {len(dup_partnumbers)} 行") for idx, row in dup_partnumbers.iterrows(): error = { '文件': file_name, 'Sheet': sheet_name, '原始行号': idx + 2, # Excel行号从1开始,标题行下一行 '异常类型': '重复Partnumber', '异常描述': f"Partnumber '{row['Partnumber']}' 重复出现" } errors.append(error) # 2. 检查Partnumber是否为空 empty_partnumbers = bom_df[bom_df['Partnumber'].isna() | (bom_df['Partnumber'] == '')] if not empty_partnumbers.empty: print(f"⚠️ 发现空Partnumber: {len(empty_partnumbers)} 行") for idx, row in empty_partnumbers.iterrows(): error = { '文件': file_name, 'Sheet': sheet_name, '原始行号': idx + 2, '异常类型': '空Partnumber', '异常描述': "Partnumber为空" } errors.append(error) # 3. 验证Reference位号数量与Quantity是否一致 for idx, row in bom_df.iterrows(): # # 跳过PCB等特殊项 # if row.get('Part Type') == 'PCB' or pd.isna(row.get('Reference')): # continue refs = str(row['Reference']) qty = row['Quantity'] try: # 计算实际位号数量 ref_count = len([r for r in refs.split(',') if r.strip()]) # 检查Quantity是否为数字 try: qty_val = int(qty) except (ValueError, TypeError): qty_val = -1 # 验证数量一致性 if ref_count != qty_val: error = { '文件': file_name, 'Sheet': sheet_name, '原始行号': idx + 2, '异常类型': '数量不一致', '异常描述': f"位号数量({ref_count}) ≠ Quantity({qty})" } errors.append(error) except Exception as e: error = { '文件': file_name, 'Sheet': sheet_name, '原始行号': idx + 2, '异常类型': '验证错误', '异常描述': f"验证异常: {str(e)}" } errors.append(error) return errors def load_bom(self, file_path): print(f"识别激活Sheet...") active_sheet = self.find_active_sheet(file_path) print(f"📊 使用Sheet: {active_sheet}") df_preview = pd.read_excel( file_path, sheet_name=active_sheet, header=None, nrows=20 ) header_row_idx = self.find_header_row(df_preview) print("加载完整BOM数据...") bom_df = pd.read_excel( file_path, sheet_name=active_sheet, header=header_row_idx, dtype=str ) if "old_bom_rows" not in self.stats or self.stats['old_bom_rows'] == 0: self.stats['old_bom_rows'] = len(bom_df) else: self.stats['new_bom_rows'] = len(bom_df) # 清理列名 bom_df.columns = [str(col).strip() for col in bom_df.columns] print(f" 原始列名: {list(bom_df.columns)}") # 列名标准化映射 column_aliases = { 'Item': 'Item', 'Partnumber': 'Partnumber', 'Part Number': 'Partnumber', 'Purchase_Code': 'Purchase_Code', 'MF_PN': 'MF_PN', 'Description': 'Description', 'Part Type': 'Part Type', 'MF_NAME': 'MF_NAME', 'Manufacturer': 'MF_NAME', 'PCB_Footprint': 'PCB_Footprint', 'Reference': 'Reference', 'References': 'Reference', 'Quantity': 'Quantity', 'Qty': 'Quantity', '加工方式': '焊接方式', '焊接方式': '焊接方式', 'Value': 'Value', '备注': '备注', 'Remark': '备注', 'Comments': '备注' } # 应用别名映射 bom_df = bom_df.rename(columns={col: alias for col, alias in column_aliases.items() if col in bom_df.columns}) print(f" 标准化后列名: {list(bom_df.columns)}") # 确保所有必需列存在 missing_cols = [col for col in self.required_columns if col not in bom_df.columns] if missing_cols: raise ValueError(f"❌ 缺少必需列: {', '.join(missing_cols)}") # 清理数据:去除空行和无效项 initial_count = len(bom_df) bom_df = bom_df.replace('', np.nan) bom_df = bom_df.dropna(subset=['Item'], how='all') cleaned_count = len(bom_df) if initial_count > cleaned_count: print( f" 清理空行: 移除 {initial_count - cleaned_count} 行 (原 {initial_count} 行 -> 现 {cleaned_count} 行)") # 执行数据验证 file_name = os.path.basename(file_path) errors = self.validate_bom(bom_df, file_name, active_sheet) self.validation_errors.extend(errors) self.stats['total_errors'] += len(errors) if errors: print(f"⚠️ 在 '{file_name}' 中发现 {len(errors)} 个数据异常") return bom_df, active_sheet def compare_reference_lists(self, old_refs_str, new_refs_str): """比较两个Reference列表,返回差异描述""" if pd.isna(old_refs_str): old_refs_str = "" if pd.isna(new_refs_str): new_refs_str = "" old_refs = set([ref.strip() for ref in str(old_refs_str).split(',') if ref.strip()]) new_refs = set([ref.strip() for ref in str(new_refs_str).split(',') if ref.strip()]) # 如果两个集合相同,返回空字符串表示无差异 if old_refs == new_refs: return "" # 计算差异 added_refs = new_refs - old_refs removed_refs = old_refs - new_refs diff_msgs = [] if added_refs: diff_msgs.append(f"增加位号: {','.join(sorted(added_refs))}") if removed_refs: diff_msgs.append(f"删除位号: {','.join(sorted(removed_refs))}") return "; ".join(diff_msgs) def compare_boms(self, old_bom, new_bom): print("开始比较两份BOM...") old_bom['Partnumber'] = old_bom['Partnumber'].astype(str).str.strip() new_bom['Partnumber'] = new_bom['Partnumber'].astype(str).str.strip() changes = [] old_partnumbers = set(old_bom['Partnumber'].unique()) if len(old_partnumbers) != len(old_bom): print(f"⚠️ 旧BOM有重复的Partnumber: 总行数{len(old_bom)},唯一物料数{len(old_partnumbers)}") new_partnumbers = set(new_bom['Partnumber'].unique()) if len(new_partnumbers) != len(new_bom): print(f"⚠️ 新BOM有重复的Partnumber: 总行数{len(new_bom)},唯一物料数{len(new_partnumbers)}") all_partnumbers = sorted(old_partnumbers | new_partnumbers) print(f" 总物料项数量: {len(all_partnumbers)} (旧BOM: {len(old_partnumbers)}, 新BOM: {len(new_partnumbers)})") for idx, pn in enumerate(all_partnumbers): if (idx + 1) % 100 == 0 or (idx + 1) == len(all_partnumbers): print(f" 处理进度: {idx + 1}/{len(all_partnumbers)} 项物料") record = {'ITEM_OLD': '', 'ITEM_NEW': ''} old_row = None new_row = None change_desc = "" old_match = old_bom[old_bom['Partnumber'] == pn] if not old_match.empty: old_row = old_match.iloc[0] record['ITEM_OLD'] = old_row['Item'] new_match = new_bom[new_bom['Partnumber'] == pn] if not new_match.empty: new_row = new_match.iloc[0] record['ITEM_NEW'] = new_row['Item'] change_type = "" if old_row is None: change_type = "新增" self.stats['added_items'] += 1 change_desc = "新增物料" elif new_row is None: change_type = "删除" self.stats['removed_items'] += 1 change_desc = "删除物料" else: change_type = "变更" self.stats['changed_items'] += 1 # 填充左侧列(旧BOM值) for change_col, bom_col in self.column_mapping.items(): if change_col == 'ITEM': continue old_val = old_row[bom_col] if old_row is not None and bom_col in old_row else '' record[change_col] = old_val # 填充右侧列(新BOM值) for change_col, bom_col in self.column_mapping.items(): if change_col == 'ITEM': continue new_val = new_row[bom_col] if new_row is not None and bom_col in new_row else '' record[f'NEW_{change_col}'] = new_val if change_type == "变更": change_details = [] qty_changed = False if 'Quantity' in old_row.index and 'Quantity' in new_row.index: old_qty = str(old_row['Quantity']) new_qty = str(new_row['Quantity']) if old_qty != new_qty: change_details.append(f"Qty: {old_qty}→{new_qty}") qty_changed = True mfpn_changed = False if 'MF_PN' in old_row.index and 'MF_PN' in new_row.index: old_mfpn = str(old_row['MF_PN']) new_mfpn = str(new_row['MF_PN']) if old_mfpn != new_mfpn: change_details.append(f"MF PN: {old_mfpn}→{new_mfpn}") mfpn_changed = True # 优化:使用新的Reference比较方法 if 'Reference' in old_row.index and 'Reference' in new_row.index: ref_diff = self.compare_reference_lists(old_row['Reference'], new_row['Reference']) if ref_diff: change_details.append(ref_diff) for change_col, bom_col in self.column_mapping.items(): if (change_col == 'ITEM' or bom_col in ['Quantity', 'MF_PN', 'Reference'] or bom_col in self.ignore_columns): continue old_val = old_row[bom_col] if old_row is not None and bom_col in old_row else '' new_val = new_row[bom_col] if new_row is not None and bom_col in new_row else '' if str(old_val) != str(new_val): change_details.append(f"{change_col}: {old_val}→{new_val}") if change_details: change_desc = "; ".join(change_details) else: change_type = "" record['Design change Type'] = change_type record['NEW_Remark'] = change_desc if change_type: changes.append(record) left_columns = ['ITEM_OLD'] + [col for col in self.change_columns if col != 'ITEM'] middle_columns = ['Design change Type'] right_columns = ['ITEM_NEW'] + [f'NEW_{col}' for col in self.change_columns if col != 'ITEM'] if 'NEW_Remark' in right_columns: right_columns.remove('NEW_Remark') right_columns.append('NEW_Remark') change_columns = left_columns + middle_columns + right_columns right_start_col = len(left_columns) + len(middle_columns) + 1 return pd.DataFrame(changes, columns=change_columns), right_start_col def generate_summary(self): summary = [ "\n" + "=" * 50, "BOM 比较处理汇总", "-" * 50, f"原始BOM行数: {self.stats['old_bom_rows']}", f"新BOM行数: {self.stats['new_bom_rows']}", f"变更物料数量: {self.stats['changed_items']}", f"新增物料数量: {self.stats['added_items']}", f"删除物料数量: {self.stats['removed_items']}", f"变更记录总数: {self.stats['changed_items'] + self.stats['added_items'] + self.stats['removed_items']}", f"数据异常总数: {self.stats['total_errors']}", "=" * 50 ] return "\n".join(summary) def generate_change_record(self): root = tk.Tk() root.withdraw() # 重置统计信息和异常记录 self.stats = { 'old_bom_rows': 0, 'new_bom_rows': 0, 'changed_items': 0, 'added_items': 0, 'removed_items': 0, 'total_errors': 0 } self.validation_errors = [] try: # 选择原始BOM文件 print("\n" + "=" * 50) print("步骤 1/4: 选择原始BOM文件") print("=" * 50) old_file = filedialog.askopenfilename( title="选择原始BOM文件", filetypes=[("Excel Files", "*.xlsx *.xls")] ) if not old_file: print("❌ 未选择文件,操作取消") return print(f"📂 已选择原始BOM: {old_file}") old_file_name = os.path.basename(old_file) # output_dir = os.path.dirname(old_file) # 选择变更后BOM文件 print("\n" + "=" * 50) print("步骤 2/4: 选择变更后BOM文件") print("=" * 50) new_file = filedialog.askopenfilename( title="选择变更后BOM文件", filetypes=[("Excel Files", "*.xlsx *.xls")] ) if not new_file: print("❌ 未选择文件,操作取消") return print(f"📂 已选择新BOM: {new_file}") new_file_name = os.path.basename(new_file) output_dir = os.path.dirname(new_file) # 加载BOM文件 print("\n" + "=" * 50) print("步骤 3/4: 加载并处理BOM文件") print("=" * 50) print(f"🔍 加载原始BOM文件: {old_file_name}") old_bom, old_bom_activesheetname = self.load_bom(old_file) print(f"✅ 原始BOM加载完成,共 {len(old_bom)} 行") print(f"\n🔍 加载变更后BOM文件: {new_file_name}") new_bom, new_bom_activesheetname = self.load_bom(new_file) print(f"✅ 新BOM加载完成,共 {len(new_bom)} 行") # 比较BOM生成变更记录 print("\n" + "=" * 50) print("步骤 4/4: 比较BOM差异并生成变更记录") print("=" * 50) print("🔍 比较BOM差异...") change_df, right_start_col = self.compare_boms(old_bom, new_bom) # 准备输出文件名 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_file = f"{old_bom_activesheetname} to {new_bom_activesheetname} eBOM_change_record_{timestamp}.xlsx" output_path = os.path.join(output_dir, output_file) # 保存变更记录和异常记录 print(f"\n💾 保存变更记录文件: {output_path}") wb = Workbook() # 创建变更记录工作表 ws_change = wb.active ws_change.title = "PCBA_BOM_change record" if change_df.empty: ws_change.cell(row=1, column=1, value="两份BOM完全相同,无变更记录") print("✅ 两份BOM完全相同,无变更记录") else: # 重命名列 column_rename = { 'ITEM_OLD': 'ITEM', 'ITEM_NEW': 'ITEM', **{f'NEW_{col}': col for col in self.change_columns if col != 'ITEM'}, 'NEW_Remark': 'Remark' } change_df = change_df.rename(columns=column_rename) # 添加文件名信息 ws_change.cell(row=1, column=1, value=old_file_name) ws_change.cell(row=1, column=right_start_col, value=new_file_name) # 添加列标题 col_names = change_df.columns.tolist() for col_idx, col_name in enumerate(col_names, 1): ws_change.cell(row=2, column=col_idx, value=col_name) # 添加数据行 for r_idx, row in enumerate(dataframe_to_rows(change_df, index=False, header=False), 3): for c_idx, value in enumerate(row, 1): ws_change.cell(row=r_idx, column=c_idx, value=value) # 创建异常记录工作表 if self.validation_errors: print(f"⚠️ 发现 {len(self.validation_errors)} 个数据异常,创建异常记录") ws_errors = wb.create_sheet(title="BOM异常记录") # 异常记录列名 error_columns = ['文件', 'Sheet', '原始行号', '异常类型', '异常描述'] for col_idx, col_name in enumerate(error_columns, 1): ws_errors.cell(row=1, column=col_idx, value=col_name) # 添加异常数据 for row_idx, error in enumerate(self.validation_errors, 2): ws_errors.cell(row=row_idx, column=1, value=error['文件']) ws_errors.cell(row=row_idx, column=2, value=error['Sheet']) ws_errors.cell(row=row_idx, column=3, value=error['原始行号']) ws_errors.cell(row=row_idx, column=4, value=error['异常类型']) ws_errors.cell(row=row_idx, column=5, value=error['异常描述']) # 保存工作簿 wb.save(output_path) # 打印处理汇总 print(self.generate_summary()) print(f"\n✅ 变更记录已保存至: {output_path}") except Exception as e: print(f"\n❌ 处理过程中出错: {str(e)}") import traceback traceback.print_exc() if __name__ == "__main__": print("=" * 60) print(" PCBA BOM 变更记录生成工具 ") print("=" * 60) print("要求: 标题行必须同时包含 'Item', 'Partnumber', 'MF_PN'") comparator = BOMComparator() comparator.generate_change_record() print("\n" + "=" * 50) print(" 处理完成,按任意键退出... ") # input()