SoFunction
Updated on 2024-12-14

Python-based implementation of a tool for comparing Exce

Purpose: To design an application GUI for comparing two Excel files.

reasoning

1. Parameters

  • The same excel file two sheet pages where an ODS (old data), a DWH (new data)
  • Generate Comparison File
  • Design two primary keys Enter primary key 1 Enter primary key 2

(Defaults to the same column names for old and new documents)

2. Effects

  • Documents generated
  • Same amount of data, take the top 10 data that are inconsistent in each field
  • The amount of data is not the same, take the first 10 data that are not the same on both sides, exclude data that are not the same, and the first 10 data that are not consistent in each field

3. Realization

  • Cyclic Comparison Combined Column (Primary Key + Comparison Column)
  • Pandas handles the difference data, openpyxl handles the data format of the generated sheet. (Mr. Cheng data, and then adjust the format)

configure

import pandas as pd
from openpyxl import load_workbook
#Select file path
path=r"C:\Users\Small Pipe Students\Desktop\Migration_Data_Compari\ Comparison File.xls" #input("Select file path:")
TargetPath=r"C:\Users\Small Pipe Students\Desktop\Migration_Data_Comparison_Tool\Target_File\Comparison_Results.xlsx"
DATA_ODS=pd.read_excel(r"C:\Users\Small Pipe Student\Desktop\Migration_Data_Comparison_Tool\Comparison_File.xls",sheet_name="ODS")
DATA_DWH=pd.read_excel(r"C:\Users\Small Pipe Student\Desktop\Migration_Data_Comparison_Tool\Comparison_File.xls",sheet_name="DWH")
# Select primary key
Primarykey="Employee number"#input("Select primary key 1:")
Primarykey
# Employee Number

I. Volume of data

Output Form 1 - Data Volume

def write_to_excel_DataVolume(Data,TargetPath): # cor_df is the dataframe to be saved
    writer = (TargetPath, engine='xlsxwriter') # And here's what we use
    Data.to_excel(writer,sheet_name='Sheet1', encoding='utf8', header=False, startcol=0, startrow=2) # Put the dataframe's data starting at row 2
    workbook  = 
    
    format1 = workbook.add_format({ # Pack the styles first, then just assign them afterwards
        'bold': True, # Font Bolding
        'text_wrap': True, # Whether or not line feeds are automatic
        'valign': 'bottom',  # Vertical alignment
        'align': 'center', # Horizontal alignment
        'fg_color': '#C5D9F1', # Cell background color
        'border': 1,# Border
    })    
    writer_sheet = ['Sheet1']
    # Set the width
    writer_sheet.set_column("A:I", 16)
    writer_sheet.set_column('C:C',30)
    writer_sheet.merge_range(0,0,0,2,'Comparison results',format1)
    writer_sheet.merge_range(4,2,4,0,'Difference in data volume',format1)
    writer_sheet.write(1,0,'',format1)
    writer_sheet.write(1,1,'ODS',format1)
    writer_sheet.write(1,2,'DWH',format1)
    ()
    ()
DataFrame_DataVolume=([[DATA_ODS.shape[0]],[DATA_DWH.shape[0]]]).T
DataFrame_DataVolume.columns =["ODS","DWH"]
DataFrame_DataVolume.index=["Volume of data"]
DataFrame_DataVolume
#writeFileDataVolume(DataFrame_DataVolume,TargetPath)
write_to_excel_DataVolume(DataFrame_DataVolume,TargetPath)

在这里插入图片描述

Output Form 2 -- Data Volume Variance Contract

if DATA_ODS.shape[0]==DATA_DWH.shape[0]:
    pass
else:
    
    DATA_ODS_Primarykey=(DATA_ODS[Primarykey])
    DATA_DWH_Primarykey=(DATA_DWH[Primarykey])
    df_union = ([DATA_ODS_Primarykey,DATA_DWH_Primarykey])
    # Realization 1
    df_diff_ODS = df_union.append(DATA_ODS_Primarykey).drop_duplicates(subset=df_union.columns.to_list(), keep=False)
    df_diff_DWH = df_union.append(DATA_DWH_Primarykey).drop_duplicates(subset=df_union.columns.to_list(), keep=False)
    #DWH more contracts
    df_diff_ODS
    #DWH less contracts
    df_diff_DWH
    df_diff_DWH_Data=[]
    df_diff_ODS_Data=[]
    for i in df_diff_ODS.head(10).():
        for n in i:
            df_diff_ODS_Data.append(n)
            
    for i in df_diff_DWH.head(10).():
            df_diff_DWH_Data.append(n)
    while True:
        if len(df_diff_DWH_Data)>len(df_diff_ODS_Data):
            df_diff_ODS_Data.append("-")
        elif len(df_diff_DWH_Data)< len(df_diff_ODS_Data):
            df_diff_DWH_Data.append("-")
        elif len(df_diff_DWH_Data)== len(df_diff_ODS_Data):
            break
    DataFrame_DataVolume_Count_result=(df_diff_DWH_Data,df_diff_ODS_Data).reset_index()
    DataFrame_DataVolume_Count_result.columns=['DWH more contracts','DWH's lesser contract']
    DataFrame_DataVolume_Count_result=DataFrame_DataVolume_Count_result.reset_index()
    DataFrame_DataVolume_Count_result.columns=['Serial number','DWH more contracts','DWH's lesser contract']
DataFrame_DataVolume_Count_result
from openpyxl import load_workbook
 
def write_to_excel_Count_result(Data,TargetPath):
    df_Old = (pd.read_excel(TargetPath)) # Read original data files and tables
    writer = (TargetPath,engine='openpyxl')
    book=load_workbook(TargetPath)
     = book
     = dict((, ws) for ws in )
    df_rows = df_Old.shape[0] # Get the number of rows of the original data
    Data.to_excel(writer,startrow=df_rows+1, index=False,startcol=0,header=True)# Write data to aa table in excel, starting with the first blank line.
    ()#Save
write_to_excel_Count_result(DataFrame_DataVolume_Count_result,TargetPath)

在这里插入图片描述

To this article on the implementation of this Python based on the comparison of Excel's widgets [achieved] article is introduced to this, more relevant Python comparison of Excel's widgets, please search for my previous posts or continue to browse the following articles hope that you will support me in the future more!