import pandas as pd import os import glob import re from datetime import datetime import tkinter as tk from tkinter import filedialog from collections import defaultdict from abc import ABC, abstractmethod from typing import Dict, List, Optional, Tuple, Any from dataclasses import dataclass @dataclass class ProcessedFileInfo: """处理文件信息类""" filename: str sheet_name: str start_row: int total_rows: int valid_rows: int @dataclass class BOMRow: """BOM行数据类""" partnumber: str purchase_code: str mf_pn: str description: str part_type: str mf_name: str pcb_footprint: str quantity: int reference: str filename: str = "" sheet_name: str = "" @classmethod def from_dataframe_row(cls, row: pd.Series, filename: str = "", sheet_name: str = "") -> Optional['BOMRow']: """从DataFrame行创建BOMRow对象""" try: return cls( partnumber=str(row.get('Partnumber', '')).strip(), purchase_code=str(row.get('Purchase_Code', '')).strip(), mf_pn=str(row.get('MF_PN', '')).strip(), description=str(row.get('Description', '')).strip(), part_type=str(row.get('Part_Type', '')).strip(), mf_name=str(row.get('MF_NAME', '')).strip(), pcb_footprint=str(row.get('PCB_Footprint', '')).strip(), quantity=int(row.get('Quantity', 0)), reference=str(row.get('Reference', '')).strip(), filename=filename, sheet_name=sheet_name ) except (ValueError, TypeError): return None def get_key(self) -> str: """获取行的唯一标识键""" return self.partnumber if self.partnumber else self.mf_pn def is_valid(self) -> bool: """检查行数据是否有效""" return bool(self.get_key()) @dataclass class ConsolidatedMaterial: """合并后的物料数据类""" partnumber: str purchase_code: str mf_pn: str description: str part_type: str mf_name: str pcb_footprint: str quantity_data: Dict[str, int] # 文件名: 数量 inconsistencies: List[str] @property def total_quantity(self) -> int: """计算总数量""" return sum(self.quantity_data.values()) @property def has_inconsistencies(self) -> bool: """检查是否有不一致""" return len(self.inconsistencies) > 0 class ConsistencyChecker: """一致性检查器""" def __init__(self): self.fields_to_check = ['Purchase_Code', 'MF_PN', 'Part_Type', 'MF_NAME', 'PCB_Footprint'] def check_field_consistency(self, existing: ConsolidatedMaterial, new_row: BOMRow) -> List[str]: """检查字段一致性""" inconsistencies = [] field_mapping = { 'Purchase_Code': ('purchase_code', 'Purchase_Code'), 'MF_PN': ('mf_pn', 'MF_PN'), 'Part_Type': ('part_type', 'Part Type'), 'MF_NAME': ('mf_name', 'MF_NAME'), 'PCB_Footprint': ('pcb_footprint', 'PCB_Footprint') } for field, (attr_name, row_field) in field_mapping.items(): existing_val = getattr(existing, attr_name) new_val = getattr(new_row, attr_name) if self._should_check_field(existing_val, new_val) and existing_val != new_val: inconsistencies.append( f"{field}不一致: {existing_val} ≠ {new_val} (文件: {new_row.filename}, Sheet: {new_row.sheet_name})" ) return inconsistencies def check_quantity_reference(self, row: BOMRow) -> Optional[str]: """检查Reference数量和Quantity是否匹配""" if not row.reference: return None ref_count = len([ref for ref in row.reference.split(',') if ref.strip()]) if ref_count != row.quantity: return f"Reference数量不符: {ref_count}个位置 ≠ Quantity={row.quantity} (文件: {row.filename}, Sheet: {row.sheet_name})" return None def _should_check_field(self, existing_val: str, new_val: str) -> bool: """判断是否应该检查字段""" # 忽略空值和无意义值 if not new_val or new_val.lower() in ['', 'nan', 'none', 'null']: return False return True class BOMFileParser: """BOM文件解析器""" def __init__(self): self.required_headers = ['Item', 'Partnumber', 'Purchase_Code', 'MF_PN'] self.required_columns = ['Partnumber', 'Purchase_Code', 'MF_PN', 'Description', 'Part_Type', 'MF_NAME', 'PCB_Footprint', 'Quantity', 'Reference'] def find_valid_sheet(self, file_path: str) -> Optional[Tuple[str, int]]: """定位包含有效BOM的Sheet""" try: xl = pd.ExcelFile(file_path) for sheet_name in xl.sheet_names: df = pd.read_excel(file_path, sheet_name=sheet_name, header=None) for i in range(min(len(df), 10)): # 只检查前10行 headers = df.iloc[i].values if all(col in str(headers) for col in self.required_headers): filename = os.path.basename(file_path) print(f"文件{filename}找到有效sheet {sheet_name}|有效数据行从 {i} 开始。") return sheet_name, i except Exception as e: print(f"读取文件 {file_path} 时出错: {e}") return None, None def parse_file(self, file_path: str) -> Optional[Tuple[List[BOMRow], ProcessedFileInfo]]: """解析BOM文件""" filename = os.path.basename(file_path) sheet_name, header_row = self.find_valid_sheet(file_path) if not sheet_name: return None try: df = pd.read_excel(file_path, sheet_name=sheet_name, header=header_row) total_rows = len(df) df = self._clean_dataframe(df) if not self._validate_columns(df): return None bom_rows = [] valid_rows = 0 for _, row_data in df.iterrows(): bom_row = BOMRow.from_dataframe_row(row_data, filename, sheet_name) if bom_row and bom_row.is_valid(): bom_rows.append(bom_row) valid_rows += 1 # 创建文件信息对象 file_info = ProcessedFileInfo( filename=filename, sheet_name=sheet_name, start_row=header_row, total_rows=total_rows, valid_rows=valid_rows ) return bom_rows, file_info except Exception as e: print(f"解析文件 {file_path} 时出错: {e}") return None def _clean_dataframe(self, df: pd.DataFrame) -> pd.DataFrame: """清洗DataFrame""" # 清理列名 df.columns = df.columns.str.strip().str.replace(r'\s+', '_', regex=True) df.columns = df.columns.str.replace(r'[^a-zA-Z0-9_]', '', regex=True) # 去除空行 df = df.dropna(how='all') return df def _validate_columns(self, df: pd.DataFrame) -> bool: """验证必要列是否存在""" missing_cols = [col for col in self.required_columns if col not in df.columns] return len(missing_cols) == 0 class MaterialConsolidator: """物料合并器""" def __init__(self): self.materials: Dict[str, ConsolidatedMaterial] = {} self.consistency_checker = ConsistencyChecker() self.file_quantities: Dict[str, Dict[str, int]] = defaultdict(dict) self.processed_files_info: List[ProcessedFileInfo] = [] def add_bom_row(self, bom_row: BOMRow) -> None: """添加BOM行数据""" key = bom_row.get_key() if key not in self.materials: # 创建新的合并物料 self.materials[key] = ConsolidatedMaterial( partnumber=bom_row.partnumber, purchase_code=bom_row.purchase_code, mf_pn=bom_row.mf_pn, description=bom_row.description, part_type=bom_row.part_type, mf_name=bom_row.mf_name, pcb_footprint=bom_row.pcb_footprint, quantity_data={}, inconsistencies=[] ) material = self.materials[key] # 检查一致性 inconsistencies = self.consistency_checker.check_field_consistency(material, bom_row) material.inconsistencies.extend(inconsistencies) # 检查数量引用 ref_inconsistency = self.consistency_checker.check_quantity_reference(bom_row) if ref_inconsistency: material.inconsistencies.append(ref_inconsistency) # 记录数量数据 material.quantity_data[bom_row.filename] = bom_row.quantity self.file_quantities[bom_row.filename][key] = bom_row.quantity def add_file_info(self, file_info: ProcessedFileInfo) -> None: """添加文件处理信息""" self.processed_files_info.append(file_info) def get_statistics(self) -> Dict[str, Any]: """获取统计信息""" total_inconsistencies = sum(len(mat.inconsistencies) for mat in self.materials.values()) materials_with_issues = sum(1 for mat in self.materials.values() if mat.has_inconsistencies) return { 'total_materials': len(self.materials), 'total_inconsistencies': total_inconsistencies, 'materials_with_issues': materials_with_issues, 'file_count': len(self.file_quantities), 'processed_files_info': self.processed_files_info } class ReportGenerator: """报告生成器""" def __init__(self, output_folder: str): self.output_folder = output_folder self._ensure_output_directory() def _ensure_output_directory(self): """确保输出目录存在""" output_dir = os.path.join(self.output_folder, "BOM_Merge_out") os.makedirs(output_dir, exist_ok=True) def _create_summary_sheet(self, stats: Dict[str, Any]) -> pd.DataFrame: """创建汇总信息Sheet""" summary_data = [ ["BOM合并检查汇总报告", ""], ["生成时间", datetime.now().strftime("%Y-%m-%d %H:%M:%S")], ["", ""], ["处理统计", ""], ["扫描文件总数", stats['total_files']], ["成功处理文件数", stats['processed_files']], ["处理数据行数", stats['processed_rows']], ["", ""], ["物料统计", ""], ["合并物料种类数", stats['total_materials']], ["存在问题的物料数", stats['materials_with_issues']], ["不一致问题总数", stats['total_inconsistencies']], ["", ""], ["数据源文件信息", ""], ["有效文件总数", len(stats.get('processed_files_info', []))], ["", ""] ] # 添加详细的数据源文件信息 files_info = stats.get('processed_files_info', []) for i, file_info in enumerate(files_info, 1): summary_data.extend([ [f"数据源文件 {i}", file_info.filename], [" Sheet名称", file_info.sheet_name], [" 起始行", file_info.start_row + 1], # 转换为1-based索引 [" 总行数", file_info.total_rows], [" 有效行数", file_info.valid_rows], ["", ""] ]) summary_data.extend([ ["", ""], ["文件信息", ""], ["输出文件夹", os.path.join(self.output_folder, "BOM_Merge_out")], ["报告文件", stats.get('output_filename', '')], ["合并Sheet名称", "BOM_Merge"] ]) return pd.DataFrame(summary_data, columns=["项目", "数值"]) def _create_data_source_sheet(self, stats: Dict[str, Any]) -> pd.DataFrame: """创建数据源文件详细信息Sheet""" files_info = stats.get('processed_files_info', []) if not files_info: return pd.DataFrame([["无有效数据源文件", ""]], columns=["状态", "说明"]) data_source_data = [] for i, file_info in enumerate(files_info, 1): data_source_data.append({ '序号': i, '文件名': file_info.filename, 'Sheet名称': file_info.sheet_name, '数据起始行': file_info.start_row + 1, # 转换为1-based索引 '总行数': file_info.total_rows, '有效行数': file_info.valid_rows, '处理状态': '成功' }) return pd.DataFrame(data_source_data) def _create_merge_sheet(self, consolidator: MaterialConsolidator) -> pd.DataFrame: """创建合并数据Sheet""" report_data = [] file_columns = sorted(consolidator.file_quantities.keys()) for material in consolidator.materials.values(): row = { 'Partnumber': material.partnumber, 'Purchase_Code': material.purchase_code, 'MF_PN': material.mf_pn, 'Description': material.description, 'Part Type': material.part_type, 'MF_NAME': material.mf_name, 'PCB_Footprint': material.pcb_footprint, '检查信息': '; '.join(material.inconsistencies) if material.inconsistencies else '一致' } # 添加各文件数量 for file in file_columns: row[file] = material.quantity_data.get(file, 0) row['合计'] = material.total_quantity report_data.append(row) return pd.DataFrame(report_data) def generate_consolidated_report(self, consolidator: MaterialConsolidator, stats: Dict[str, Any]) -> Optional[str]: """生成包含多个Sheet的合并报告""" if not consolidator.materials: return None # 生成带时间戳的文件名 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_filename = f"BOM合并报告_{timestamp}.xlsx" output_path = os.path.join(self.output_folder, "BOM_Merge_out", output_filename) try: # 使用ExcelWriter创建多Sheet的Excel文件 with pd.ExcelWriter(output_path, engine='openpyxl') as writer: # Sheet 1: 汇总信息 summary_df = self._create_summary_sheet(stats) summary_df.to_excel(writer, sheet_name='汇总信息', index=False) # Sheet 2: 数据源文件信息 data_source_df = self._create_data_source_sheet(stats) data_source_df.to_excel(writer, sheet_name='数据源文件', index=False) # Sheet 3: 合并数据 merge_df = self._create_merge_sheet(consolidator) merge_df.to_excel(writer, sheet_name='BOM_Merge', index=False) # 调整列宽 workbook = writer.book # 调整汇总信息Sheet列宽 summary_sheet = workbook['汇总信息'] summary_sheet.column_dimensions['A'].width = 25 summary_sheet.column_dimensions['B'].width = 40 # 调整数据源文件Sheet列宽 data_source_sheet = workbook['数据源文件'] for col in data_source_sheet.columns: max_length = 0 column = col[0].column_letter for cell in col: try: if len(str(cell.value)) > max_length: max_length = len(str(cell.value)) except: pass adjusted_width = min(max_length + 2, 30) data_source_sheet.column_dimensions[column].width = adjusted_width # 调整合并数据Sheet列宽 merge_sheet = workbook['BOM_Merge'] for col in merge_sheet.columns: max_length = 0 column = col[0].column_letter for cell in col: try: if len(str(cell.value)) > max_length: max_length = len(str(cell.value)) except: pass adjusted_width = min(max_length + 2, 50) merge_sheet.column_dimensions[column].width = adjusted_width # 更新stats中的文件名 stats['output_filename'] = output_filename return output_path except Exception as e: print(f"保存报告失败: {e}") return None class BOMProcessor: """BOM处理器 - 主控制器""" def __init__(self): self.file_parser = BOMFileParser() self.material_consolidator = MaterialConsolidator() self.report_generator: Optional[ReportGenerator] = None # 统计信息 self.processed_files = 0 self.processed_rows = 0 self.total_files = 0 def set_output_folder(self, folder_path: str): """设置输出文件夹""" self.report_generator = ReportGenerator(folder_path) def process_folder(self, folder_path: str) -> bool: """处理文件夹中的所有BOM文件""" bom_files = glob.glob(os.path.join(folder_path, "*.xlsx")) self.total_files = len(bom_files) if not bom_files: return False successful_files = 0 for file_path in bom_files: if self._process_single_file(file_path): successful_files += 1 self.processed_files = successful_files return successful_files > 0 def _process_single_file(self, file_path: str) -> bool: """处理单个文件""" filename = os.path.basename(file_path) print(f"处理文件: {filename}...") result = self.file_parser.parse_file(file_path) if not result: print(f" ! 无法解析文件: {filename}") return False bom_rows, file_info = result print(f" √ 文件{filename}找到 {len(bom_rows)} 行有效数据 (Sheet: {file_info.sheet_name})") # 添加文件处理信息 self.material_consolidator.add_file_info(file_info) # 处理BOM行数据 for bom_row in bom_rows: self.material_consolidator.add_bom_row(bom_row) self.processed_rows += 1 return True def generate_report(self) -> Optional[Dict[str, Any]]: """生成报告并返回统计信息""" if not self.report_generator: return None # 获取基本统计信息 base_stats = self.material_consolidator.get_statistics() base_stats.update({ 'processed_files': self.processed_files, 'total_files': self.total_files, 'processed_rows': self.processed_rows }) # 生成报告 output_path = self.report_generator.generate_consolidated_report( self.material_consolidator, base_stats ) if not output_path: return None # 返回完整的统计信息 base_stats['output_path'] = output_path return base_stats class UserInterface: """用户界面处理器""" @staticmethod def select_folder(title: str = "选择文件夹") -> str: """选择文件夹""" root = tk.Tk() root.withdraw() folder_path = filedialog.askdirectory(title=title) root.destroy() return folder_path @staticmethod def print_summary(stats: Dict[str, Any], folder_path: str): """打印汇总信息""" print("\n" + "=" * 60) print("BOM合并检查完成!") print("=" * 60) print(f"处理文件夹: {folder_path}") print(f"扫描文件数: {stats['total_files']}") print(f"成功处理文件数: {stats['processed_files']}") print(f"处理数据行数: {stats['processed_rows']}") print(f"合并物料种类数: {stats['total_materials']}") print(f"存在问题的物料数: {stats['materials_with_issues']}") print(f"不一致问题总数: {stats['total_inconsistencies']}") # 显示数据源文件信息 files_info = stats.get('processed_files_info', []) print(f"有效数据源文件数: {len(files_info)}") for file_info in files_info: print(f" - {file_info.filename} (Sheet: {file_info.sheet_name}, 有效行: {file_info.valid_rows})") print(f"报告文件: {stats['output_path']}") print("=" * 60) # 额外显示输出文件夹信息 output_dir = os.path.join(folder_path, "BOM_Merge_out") print(f"输出保存在: {output_dir}") print("\n报告包含三个Sheet:") print("1. '汇总信息' - 处理统计和汇总信息") print("2. '数据源文件' - 有效数据源文件详细信息") print("3. 'BOM_Merge' - 合并后的物料数据") def main(): """主函数""" # 初始化处理器 bom_processor = BOMProcessor() # 选择文件夹 folder_path = UserInterface.select_folder("选择包含BOM文件的文件夹") if not folder_path: print("未选择文件夹,程序退出") return bom_processor.set_output_folder(folder_path) # 处理文件 print(f"开始处理文件夹: {folder_path}") success = bom_processor.process_folder(folder_path) if not success: print("没有找到可处理的BOM文件") return # 生成报告 print("\n生成合并报告...") stats = bom_processor.generate_report() if stats: UserInterface.print_summary(stats, folder_path) else: print("生成报告失败") if __name__ == "__main__": main() input("\n按任意键退出...")