Erlo

Python实现对比两个Excel表某个范围的内容并提取出差异

2025-09-17 02:29:11 发布   97 浏览  
页面报错/反馈
收藏 点赞

# pip install openpyxl
from openpyxl import load_workbook, Workbook
from openpyxl.utils.cell import coordinate_from_string, column_index_from_string, get_column_letter
from openpyxl.styles import Font, PatternFill


def _normalize_range(start_cell: str, end_cell: str) -> str:
    """把两个单元格规范化成合法的范围字符串,例如 'A2', 'A10' -> 'A2:A10'"""
    col1, row1 = coordinate_from_string(start_cell)
    col2, row2 = coordinate_from_string(end_cell)
    c1 = column_index_from_string(col1)
    c2 = column_index_from_string(col2)

    min_col = min(c1, c2)
    max_col = max(c1, c2)
    min_row = min(row1, row2)
    max_row = max(row1, row2)

    return f"{get_column_letter(min_col)}{min_row}:{get_column_letter(max_col)}{max_row}"


def _read_range_values(ws, ranges):
    """
    从 worksheet 中的多个范围读取值。
    返回 (原始值, 规范化值) 的列表。
    - 规范化值:去掉前后空格,字符串转小写。
    """
    values = []
    for start_cell, end_cell in ranges:
        range_str = _normalize_range(start_cell, end_cell)
        cells = ws[range_str]
        for row in cells:
            for cell in row:
                if cell.value is not None:
                    raw = cell.value
                    if isinstance(raw, str):
                        norm = raw.strip().lower()  # 忽略大小写 + 去掉前后空格
                    else:
                        norm = raw
                    values.append((raw, norm))
    return values


def _ordered_set_difference(list_a, list_b):
    """
    返回 list_a 独有的元素(基于规范化值比较)。
    保持顺序 + 去重,输出原始值。
    """
    set_b_norm = {norm for _, norm in list_b}
    seen_norm = set()
    result = []
    for raw, norm in list_a:
        if norm in seen_norm:
            continue
        if norm not in set_b_norm:
            result.append(raw)
        seen_norm.add(norm)
    return result


def compare_excel_columns(
    file1, ranges1, file2, ranges2,
    output_file="result.xlsx",
    sheet_name1=None, sheet_name2=None
):
    """
    比较两个 Excel 文件的指定范围,输出差异。
    参数:
      - file1, file2: 文件路径
      - ranges1, ranges2: 列表,每个元素是 (start_cell, end_cell)
      - sheet_name1, sheet_name2: 工作表名称(不传则用 active)
      - output_file: 输出文件名
    """

    # 读取文件1
    wb1 = load_workbook(file1, data_only=True)
    ws1 = wb1[sheet_name1] if sheet_name1 else wb1.active
    vals1 = _read_range_values(ws1, ranges1)

    # 读取文件2
    wb2 = load_workbook(file2, data_only=True)
    ws2 = wb2[sheet_name2] if sheet_name2 else wb2.active
    vals2 = _read_range_values(ws2, ranges2)

    # 求差异
    only_in_file1 = _ordered_set_difference(vals1, vals2)
    only_in_file2 = _ordered_set_difference(vals2, vals1)

    # 输出结果
    wb_out = Workbook()
    ws_out = wb_out.active
    ws_out.title = "Comparison Result"

    # 表头
    ws_out["A1"] = "Only in file1"
    ws_out["B1"] = "Only in file2"
    ws_out["A1"].font = ws_out["B1"].font = Font(bold=True, color="FFFFFF")
    ws_out["A1"].fill = ws_out["B1"].fill = PatternFill("solid", fgColor="4F81BD")

    max_rows = max(len(only_in_file1), len(only_in_file2))
    for i in range(max_rows):
        if i 

登录查看全部

参与评论

评论留言

还没有评论留言,赶紧来抢楼吧~~

手机查看

返回顶部

给这篇文章打个标签吧~

棒极了 糟糕透顶 好文章 PHP JAVA JS 小程序 Python SEO MySql 确认