Python脚本开发文件初始化

This commit is contained in:
2026-02-02 15:19:30 +08:00
parent 86c4718368
commit 5c846eae94
25 changed files with 8746 additions and 0 deletions

View File

@@ -0,0 +1,618 @@
import pandas as pd
import os
import glob
import re
from datetime import datetime
import tkinter as tk
from tkinter import filedialog
from collections import defaultdict
from abc import ABC, abstractmethod
from typing import Dict, List, Optional, Tuple, Any
from dataclasses import dataclass
@dataclass
class ProcessedFileInfo:
"""处理文件信息类"""
filename: str
sheet_name: str
start_row: int
total_rows: int
valid_rows: int
@dataclass
class BOMRow:
"""BOM行数据类"""
partnumber: str
purchase_code: str
mf_pn: str
description: str
part_type: str
mf_name: str
pcb_footprint: str
quantity: int
reference: str
filename: str = ""
sheet_name: str = ""
@classmethod
def from_dataframe_row(cls, row: pd.Series, filename: str = "", sheet_name: str = "") -> Optional['BOMRow']:
"""从DataFrame行创建BOMRow对象"""
try:
return cls(
partnumber=str(row.get('Partnumber', '')).strip(),
purchase_code=str(row.get('Purchase_Code', '')).strip(),
mf_pn=str(row.get('MF_PN', '')).strip(),
description=str(row.get('Description', '')).strip(),
part_type=str(row.get('Part_Type', '')).strip(),
mf_name=str(row.get('MF_NAME', '')).strip(),
pcb_footprint=str(row.get('PCB_Footprint', '')).strip(),
quantity=int(row.get('Quantity', 0)),
reference=str(row.get('Reference', '')).strip(),
filename=filename,
sheet_name=sheet_name
)
except (ValueError, TypeError):
return None
def get_key(self) -> str:
"""获取行的唯一标识键"""
return self.partnumber if self.partnumber else self.mf_pn
def is_valid(self) -> bool:
"""检查行数据是否有效"""
return bool(self.get_key())
@dataclass
class ConsolidatedMaterial:
"""合并后的物料数据类"""
partnumber: str
purchase_code: str
mf_pn: str
description: str
part_type: str
mf_name: str
pcb_footprint: str
quantity_data: Dict[str, int] # 文件名: 数量
inconsistencies: List[str]
@property
def total_quantity(self) -> int:
"""计算总数量"""
return sum(self.quantity_data.values())
@property
def has_inconsistencies(self) -> bool:
"""检查是否有不一致"""
return len(self.inconsistencies) > 0
class ConsistencyChecker:
"""一致性检查器"""
def __init__(self):
self.fields_to_check = ['Purchase_Code', 'MF_PN', 'Part_Type', 'MF_NAME', 'PCB_Footprint']
def check_field_consistency(self, existing: ConsolidatedMaterial, new_row: BOMRow) -> List[str]:
"""检查字段一致性"""
inconsistencies = []
field_mapping = {
'Purchase_Code': ('purchase_code', 'Purchase_Code'),
'MF_PN': ('mf_pn', 'MF_PN'),
'Part_Type': ('part_type', 'Part Type'),
'MF_NAME': ('mf_name', 'MF_NAME'),
'PCB_Footprint': ('pcb_footprint', 'PCB_Footprint')
}
for field, (attr_name, row_field) in field_mapping.items():
existing_val = getattr(existing, attr_name)
new_val = getattr(new_row, attr_name)
if self._should_check_field(existing_val, new_val) and existing_val != new_val:
inconsistencies.append(
f"{field}不一致: {existing_val}{new_val} (文件: {new_row.filename}, Sheet: {new_row.sheet_name})"
)
return inconsistencies
def check_quantity_reference(self, row: BOMRow) -> Optional[str]:
"""检查Reference数量和Quantity是否匹配"""
if not row.reference:
return None
ref_count = len([ref for ref in row.reference.split(',') if ref.strip()])
if ref_count != row.quantity:
return f"Reference数量不符: {ref_count}个位置 ≠ Quantity={row.quantity} (文件: {row.filename}, Sheet: {row.sheet_name})"
return None
def _should_check_field(self, existing_val: str, new_val: str) -> bool:
"""判断是否应该检查字段"""
# 忽略空值和无意义值
if not new_val or new_val.lower() in ['', 'nan', 'none', 'null']:
return False
return True
class BOMFileParser:
"""BOM文件解析器"""
def __init__(self):
self.required_headers = ['Item', 'Partnumber', 'Purchase_Code', 'MF_PN']
self.required_columns = ['Partnumber', 'Purchase_Code', 'MF_PN', 'Description',
'Part_Type', 'MF_NAME', 'PCB_Footprint', 'Quantity', 'Reference']
def find_valid_sheet(self, file_path: str) -> Optional[Tuple[str, int]]:
"""定位包含有效BOM的Sheet"""
try:
xl = pd.ExcelFile(file_path)
for sheet_name in xl.sheet_names:
df = pd.read_excel(file_path, sheet_name=sheet_name, header=None)
for i in range(min(len(df), 10)): # 只检查前10行
headers = df.iloc[i].values
if all(col in str(headers) for col in self.required_headers):
filename = os.path.basename(file_path)
print(f"文件{filename}找到有效sheet {sheet_name}|有效数据行从 {i} 开始。")
return sheet_name, i
except Exception as e:
print(f"读取文件 {file_path} 时出错: {e}")
return None, None
def parse_file(self, file_path: str) -> Optional[Tuple[List[BOMRow], ProcessedFileInfo]]:
"""解析BOM文件"""
filename = os.path.basename(file_path)
sheet_name, header_row = self.find_valid_sheet(file_path)
if not sheet_name:
return None
try:
df = pd.read_excel(file_path, sheet_name=sheet_name, header=header_row)
total_rows = len(df)
df = self._clean_dataframe(df)
if not self._validate_columns(df):
return None
bom_rows = []
valid_rows = 0
for _, row_data in df.iterrows():
bom_row = BOMRow.from_dataframe_row(row_data, filename, sheet_name)
if bom_row and bom_row.is_valid():
bom_rows.append(bom_row)
valid_rows += 1
# 创建文件信息对象
file_info = ProcessedFileInfo(
filename=filename,
sheet_name=sheet_name,
start_row=header_row,
total_rows=total_rows,
valid_rows=valid_rows
)
return bom_rows, file_info
except Exception as e:
print(f"解析文件 {file_path} 时出错: {e}")
return None
def _clean_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
"""清洗DataFrame"""
# 清理列名
df.columns = df.columns.str.strip().str.replace(r'\s+', '_', regex=True)
df.columns = df.columns.str.replace(r'[^a-zA-Z0-9_]', '', regex=True)
# 去除空行
df = df.dropna(how='all')
return df
def _validate_columns(self, df: pd.DataFrame) -> bool:
"""验证必要列是否存在"""
missing_cols = [col for col in self.required_columns if col not in df.columns]
return len(missing_cols) == 0
class MaterialConsolidator:
"""物料合并器"""
def __init__(self):
self.materials: Dict[str, ConsolidatedMaterial] = {}
self.consistency_checker = ConsistencyChecker()
self.file_quantities: Dict[str, Dict[str, int]] = defaultdict(dict)
self.processed_files_info: List[ProcessedFileInfo] = []
def add_bom_row(self, bom_row: BOMRow) -> None:
"""添加BOM行数据"""
key = bom_row.get_key()
if key not in self.materials:
# 创建新的合并物料
self.materials[key] = ConsolidatedMaterial(
partnumber=bom_row.partnumber,
purchase_code=bom_row.purchase_code,
mf_pn=bom_row.mf_pn,
description=bom_row.description,
part_type=bom_row.part_type,
mf_name=bom_row.mf_name,
pcb_footprint=bom_row.pcb_footprint,
quantity_data={},
inconsistencies=[]
)
material = self.materials[key]
# 检查一致性
inconsistencies = self.consistency_checker.check_field_consistency(material, bom_row)
material.inconsistencies.extend(inconsistencies)
# 检查数量引用
ref_inconsistency = self.consistency_checker.check_quantity_reference(bom_row)
if ref_inconsistency:
material.inconsistencies.append(ref_inconsistency)
# 记录数量数据
material.quantity_data[bom_row.filename] = bom_row.quantity
self.file_quantities[bom_row.filename][key] = bom_row.quantity
def add_file_info(self, file_info: ProcessedFileInfo) -> None:
"""添加文件处理信息"""
self.processed_files_info.append(file_info)
def get_statistics(self) -> Dict[str, Any]:
"""获取统计信息"""
total_inconsistencies = sum(len(mat.inconsistencies) for mat in self.materials.values())
materials_with_issues = sum(1 for mat in self.materials.values() if mat.has_inconsistencies)
return {
'total_materials': len(self.materials),
'total_inconsistencies': total_inconsistencies,
'materials_with_issues': materials_with_issues,
'file_count': len(self.file_quantities),
'processed_files_info': self.processed_files_info
}
class ReportGenerator:
"""报告生成器"""
def __init__(self, output_folder: str):
self.output_folder = output_folder
self._ensure_output_directory()
def _ensure_output_directory(self):
"""确保输出目录存在"""
output_dir = os.path.join(self.output_folder, "BOM_Merge_out")
os.makedirs(output_dir, exist_ok=True)
def _create_summary_sheet(self, stats: Dict[str, Any]) -> pd.DataFrame:
"""创建汇总信息Sheet"""
summary_data = [
["BOM合并检查汇总报告", ""],
["生成时间", datetime.now().strftime("%Y-%m-%d %H:%M:%S")],
["", ""],
["处理统计", ""],
["扫描文件总数", stats['total_files']],
["成功处理文件数", stats['processed_files']],
["处理数据行数", stats['processed_rows']],
["", ""],
["物料统计", ""],
["合并物料种类数", stats['total_materials']],
["存在问题的物料数", stats['materials_with_issues']],
["不一致问题总数", stats['total_inconsistencies']],
["", ""],
["数据源文件信息", ""],
["有效文件总数", len(stats.get('processed_files_info', []))],
["", ""]
]
# 添加详细的数据源文件信息
files_info = stats.get('processed_files_info', [])
for i, file_info in enumerate(files_info, 1):
summary_data.extend([
[f"数据源文件 {i}", file_info.filename],
[" Sheet名称", file_info.sheet_name],
[" 起始行", file_info.start_row + 1], # 转换为1-based索引
[" 总行数", file_info.total_rows],
[" 有效行数", file_info.valid_rows],
["", ""]
])
summary_data.extend([
["", ""],
["文件信息", ""],
["输出文件夹", os.path.join(self.output_folder, "BOM_Merge_out")],
["报告文件", stats.get('output_filename', '')],
["合并Sheet名称", "BOM_Merge"]
])
return pd.DataFrame(summary_data, columns=["项目", "数值"])
def _create_data_source_sheet(self, stats: Dict[str, Any]) -> pd.DataFrame:
"""创建数据源文件详细信息Sheet"""
files_info = stats.get('processed_files_info', [])
if not files_info:
return pd.DataFrame([["无有效数据源文件", ""]], columns=["状态", "说明"])
data_source_data = []
for i, file_info in enumerate(files_info, 1):
data_source_data.append({
'序号': i,
'文件名': file_info.filename,
'Sheet名称': file_info.sheet_name,
'数据起始行': file_info.start_row + 1, # 转换为1-based索引
'总行数': file_info.total_rows,
'有效行数': file_info.valid_rows,
'处理状态': '成功'
})
return pd.DataFrame(data_source_data)
def _create_merge_sheet(self, consolidator: MaterialConsolidator) -> pd.DataFrame:
"""创建合并数据Sheet"""
report_data = []
file_columns = sorted(consolidator.file_quantities.keys())
for material in consolidator.materials.values():
row = {
'Partnumber': material.partnumber,
'Purchase_Code': material.purchase_code,
'MF_PN': material.mf_pn,
'Description': material.description,
'Part Type': material.part_type,
'MF_NAME': material.mf_name,
'PCB_Footprint': material.pcb_footprint,
'检查信息': '; '.join(material.inconsistencies) if material.inconsistencies else '一致'
}
# 添加各文件数量
for file in file_columns:
row[file] = material.quantity_data.get(file, 0)
row['合计'] = material.total_quantity
report_data.append(row)
return pd.DataFrame(report_data)
def generate_consolidated_report(self, consolidator: MaterialConsolidator, stats: Dict[str, Any]) -> Optional[str]:
"""生成包含多个Sheet的合并报告"""
if not consolidator.materials:
return None
# 生成带时间戳的文件名
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_filename = f"BOM合并报告_{timestamp}.xlsx"
output_path = os.path.join(self.output_folder, "BOM_Merge_out", output_filename)
try:
# 使用ExcelWriter创建多Sheet的Excel文件
with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
# Sheet 1: 汇总信息
summary_df = self._create_summary_sheet(stats)
summary_df.to_excel(writer, sheet_name='汇总信息', index=False)
# Sheet 2: 数据源文件信息
data_source_df = self._create_data_source_sheet(stats)
data_source_df.to_excel(writer, sheet_name='数据源文件', index=False)
# Sheet 3: 合并数据
merge_df = self._create_merge_sheet(consolidator)
merge_df.to_excel(writer, sheet_name='BOM_Merge', index=False)
# 调整列宽
workbook = writer.book
# 调整汇总信息Sheet列宽
summary_sheet = workbook['汇总信息']
summary_sheet.column_dimensions['A'].width = 25
summary_sheet.column_dimensions['B'].width = 40
# 调整数据源文件Sheet列宽
data_source_sheet = workbook['数据源文件']
for col in data_source_sheet.columns:
max_length = 0
column = col[0].column_letter
for cell in col:
try:
if len(str(cell.value)) > max_length:
max_length = len(str(cell.value))
except:
pass
adjusted_width = min(max_length + 2, 30)
data_source_sheet.column_dimensions[column].width = adjusted_width
# 调整合并数据Sheet列宽
merge_sheet = workbook['BOM_Merge']
for col in merge_sheet.columns:
max_length = 0
column = col[0].column_letter
for cell in col:
try:
if len(str(cell.value)) > max_length:
max_length = len(str(cell.value))
except:
pass
adjusted_width = min(max_length + 2, 50)
merge_sheet.column_dimensions[column].width = adjusted_width
# 更新stats中的文件名
stats['output_filename'] = output_filename
return output_path
except Exception as e:
print(f"保存报告失败: {e}")
return None
class BOMProcessor:
"""BOM处理器 - 主控制器"""
def __init__(self):
self.file_parser = BOMFileParser()
self.material_consolidator = MaterialConsolidator()
self.report_generator: Optional[ReportGenerator] = None
# 统计信息
self.processed_files = 0
self.processed_rows = 0
self.total_files = 0
def set_output_folder(self, folder_path: str):
"""设置输出文件夹"""
self.report_generator = ReportGenerator(folder_path)
def process_folder(self, folder_path: str) -> bool:
"""处理文件夹中的所有BOM文件"""
bom_files = glob.glob(os.path.join(folder_path, "*.xlsx"))
self.total_files = len(bom_files)
if not bom_files:
return False
successful_files = 0
for file_path in bom_files:
if self._process_single_file(file_path):
successful_files += 1
self.processed_files = successful_files
return successful_files > 0
def _process_single_file(self, file_path: str) -> bool:
"""处理单个文件"""
filename = os.path.basename(file_path)
print(f"处理文件: {filename}...")
result = self.file_parser.parse_file(file_path)
if not result:
print(f" ! 无法解析文件: {filename}")
return False
bom_rows, file_info = result
print(f" √ 文件{filename}找到 {len(bom_rows)} 行有效数据 (Sheet: {file_info.sheet_name})")
# 添加文件处理信息
self.material_consolidator.add_file_info(file_info)
# 处理BOM行数据
for bom_row in bom_rows:
self.material_consolidator.add_bom_row(bom_row)
self.processed_rows += 1
return True
def generate_report(self) -> Optional[Dict[str, Any]]:
"""生成报告并返回统计信息"""
if not self.report_generator:
return None
# 获取基本统计信息
base_stats = self.material_consolidator.get_statistics()
base_stats.update({
'processed_files': self.processed_files,
'total_files': self.total_files,
'processed_rows': self.processed_rows
})
# 生成报告
output_path = self.report_generator.generate_consolidated_report(
self.material_consolidator, base_stats
)
if not output_path:
return None
# 返回完整的统计信息
base_stats['output_path'] = output_path
return base_stats
class UserInterface:
"""用户界面处理器"""
@staticmethod
def select_folder(title: str = "选择文件夹") -> str:
"""选择文件夹"""
root = tk.Tk()
root.withdraw()
folder_path = filedialog.askdirectory(title=title)
root.destroy()
return folder_path
@staticmethod
def print_summary(stats: Dict[str, Any], folder_path: str):
"""打印汇总信息"""
print("\n" + "=" * 60)
print("BOM合并检查完成!")
print("=" * 60)
print(f"处理文件夹: {folder_path}")
print(f"扫描文件数: {stats['total_files']}")
print(f"成功处理文件数: {stats['processed_files']}")
print(f"处理数据行数: {stats['processed_rows']}")
print(f"合并物料种类数: {stats['total_materials']}")
print(f"存在问题的物料数: {stats['materials_with_issues']}")
print(f"不一致问题总数: {stats['total_inconsistencies']}")
# 显示数据源文件信息
files_info = stats.get('processed_files_info', [])
print(f"有效数据源文件数: {len(files_info)}")
for file_info in files_info:
print(f" - {file_info.filename} (Sheet: {file_info.sheet_name}, 有效行: {file_info.valid_rows})")
print(f"报告文件: {stats['output_path']}")
print("=" * 60)
# 额外显示输出文件夹信息
output_dir = os.path.join(folder_path, "BOM_Merge_out")
print(f"输出保存在: {output_dir}")
print("\n报告包含三个Sheet:")
print("1. '汇总信息' - 处理统计和汇总信息")
print("2. '数据源文件' - 有效数据源文件详细信息")
print("3. 'BOM_Merge' - 合并后的物料数据")
def main():
"""主函数"""
# 初始化处理器
bom_processor = BOMProcessor()
# 选择文件夹
folder_path = UserInterface.select_folder("选择包含BOM文件的文件夹")
if not folder_path:
print("未选择文件夹,程序退出")
return
bom_processor.set_output_folder(folder_path)
# 处理文件
print(f"开始处理文件夹: {folder_path}")
success = bom_processor.process_folder(folder_path)
if not success:
print("没有找到可处理的BOM文件")
return
# 生成报告
print("\n生成合并报告...")
stats = bom_processor.generate_report()
if stats:
UserInterface.print_summary(stats, folder_path)
else:
print("生成报告失败")
if __name__ == "__main__":
main()
input("\n按任意键退出...")