Purpose: To design an application GUI for comparing two Excel files.
reasoning
1. Parameters
- The same excel file two sheet pages where an ODS (old data), a DWH (new data)
- Generate Comparison File
- Design two primary keys Enter primary key 1 Enter primary key 2
(Defaults to the same column names for old and new documents)
2. Effects
- Documents generated
- Same amount of data, take the top 10 data that are inconsistent in each field
- The amount of data is not the same, take the first 10 data that are not the same on both sides, exclude data that are not the same, and the first 10 data that are not consistent in each field
3. Realization
- Cyclic Comparison Combined Column (Primary Key + Comparison Column)
- Pandas handles the difference data, openpyxl handles the data format of the generated sheet. (Mr. Cheng data, and then adjust the format)
configure
import pandas as pd from openpyxl import load_workbook #Select file path path=r"C:\Users\Small Pipe Students\Desktop\Migration_Data_Compari\ Comparison File.xls" #input("Select file path:") TargetPath=r"C:\Users\Small Pipe Students\Desktop\Migration_Data_Comparison_Tool\Target_File\Comparison_Results.xlsx" DATA_ODS=pd.read_excel(r"C:\Users\Small Pipe Student\Desktop\Migration_Data_Comparison_Tool\Comparison_File.xls",sheet_name="ODS") DATA_DWH=pd.read_excel(r"C:\Users\Small Pipe Student\Desktop\Migration_Data_Comparison_Tool\Comparison_File.xls",sheet_name="DWH") # Select primary key Primarykey="Employee number"#input("Select primary key 1:") Primarykey # Employee Number
I. Volume of data
Output Form 1 - Data Volume
def write_to_excel_DataVolume(Data,TargetPath): # cor_df is the dataframe to be saved writer = (TargetPath, engine='xlsxwriter') # And here's what we use Data.to_excel(writer,sheet_name='Sheet1', encoding='utf8', header=False, startcol=0, startrow=2) # Put the dataframe's data starting at row 2 workbook = format1 = workbook.add_format({ # Pack the styles first, then just assign them afterwards 'bold': True, # Font Bolding 'text_wrap': True, # Whether or not line feeds are automatic 'valign': 'bottom', # Vertical alignment 'align': 'center', # Horizontal alignment 'fg_color': '#C5D9F1', # Cell background color 'border': 1,# Border }) writer_sheet = ['Sheet1'] # Set the width writer_sheet.set_column("A:I", 16) writer_sheet.set_column('C:C',30) writer_sheet.merge_range(0,0,0,2,'Comparison results',format1) writer_sheet.merge_range(4,2,4,0,'Difference in data volume',format1) writer_sheet.write(1,0,'',format1) writer_sheet.write(1,1,'ODS',format1) writer_sheet.write(1,2,'DWH',format1) () () DataFrame_DataVolume=([[DATA_ODS.shape[0]],[DATA_DWH.shape[0]]]).T DataFrame_DataVolume.columns =["ODS","DWH"] DataFrame_DataVolume.index=["Volume of data"] DataFrame_DataVolume #writeFileDataVolume(DataFrame_DataVolume,TargetPath) write_to_excel_DataVolume(DataFrame_DataVolume,TargetPath)
Output Form 2 -- Data Volume Variance Contract
if DATA_ODS.shape[0]==DATA_DWH.shape[0]: pass else: DATA_ODS_Primarykey=(DATA_ODS[Primarykey]) DATA_DWH_Primarykey=(DATA_DWH[Primarykey]) df_union = ([DATA_ODS_Primarykey,DATA_DWH_Primarykey]) # Realization 1 df_diff_ODS = df_union.append(DATA_ODS_Primarykey).drop_duplicates(subset=df_union.columns.to_list(), keep=False) df_diff_DWH = df_union.append(DATA_DWH_Primarykey).drop_duplicates(subset=df_union.columns.to_list(), keep=False) #DWH more contracts df_diff_ODS #DWH less contracts df_diff_DWH df_diff_DWH_Data=[] df_diff_ODS_Data=[] for i in df_diff_ODS.head(10).(): for n in i: df_diff_ODS_Data.append(n) for i in df_diff_DWH.head(10).(): df_diff_DWH_Data.append(n) while True: if len(df_diff_DWH_Data)>len(df_diff_ODS_Data): df_diff_ODS_Data.append("-") elif len(df_diff_DWH_Data)< len(df_diff_ODS_Data): df_diff_DWH_Data.append("-") elif len(df_diff_DWH_Data)== len(df_diff_ODS_Data): break DataFrame_DataVolume_Count_result=(df_diff_DWH_Data,df_diff_ODS_Data).reset_index() DataFrame_DataVolume_Count_result.columns=['DWH more contracts','DWH's lesser contract'] DataFrame_DataVolume_Count_result=DataFrame_DataVolume_Count_result.reset_index() DataFrame_DataVolume_Count_result.columns=['Serial number','DWH more contracts','DWH's lesser contract'] DataFrame_DataVolume_Count_result from openpyxl import load_workbook def write_to_excel_Count_result(Data,TargetPath): df_Old = (pd.read_excel(TargetPath)) # Read original data files and tables writer = (TargetPath,engine='openpyxl') book=load_workbook(TargetPath) = book = dict((, ws) for ws in ) df_rows = df_Old.shape[0] # Get the number of rows of the original data Data.to_excel(writer,startrow=df_rows+1, index=False,startcol=0,header=True)# Write data to aa table in excel, starting with the first blank line. ()#Save write_to_excel_Count_result(DataFrame_DataVolume_Count_result,TargetPath)
To this article on the implementation of this Python based on the comparison of Excel's widgets [achieved] article is introduced to this, more relevant Python comparison of Excel's widgets, please search for my previous posts or continue to browse the following articles hope that you will support me in the future more!