import json
import pandas as pd
import datetime
import time
import numpy as np
import datetime
import time
import calendar
def dateFormatID_BIRTH(row):
dateString = row['ID_BIRTH']
return dateFormat(dateString)
def dateFormat_XXXX(row):
dateString = row['XXXX']
return dateFormat(dateString)
def dateFormatWithColumn(row, column):
dateString = row[column]
return dateFormat(dateString)
def dateFormat(dateString):
if '' == dateString or None == dateString:
return ''
else:
d = datetime.datetime.strptime(dateString, "%Y/%m/%d")
r = d.strftime("%Y/%m/%d")
return r
def print2(msg):
print(datetime.datetime.now(), end=' => ')
print(msg)
def print3(msg):
print(datetime.datetime.now())
print(msg)
print('')
def printType(obj):
print(type(obj))
fileA = r'C:\Users\user\Anaconda-Python-Jupyter\Python_diff\filea.csv'
fileB = r'C:\Users\user\Anaconda-Python-Jupyter\Python_diff\fileb.csv'
outA ='outA.csv'
outB ='outB.csv'
COLUM_NAME_ID_BIRTH ='ID_BIRTH'
ROW_COUNT = 0
errors =[]
equal_record =[]
print2('read_csv file A 離線資料')
df_a = pd.read_csv(fileA, index_col=False, dtype=str)
df_a = df_a.replace({pd.np.nan: ''})
df_as = df_a.sort_values(by =[COLUM_NAME_ID_BIRTH]).copy()
print2('read_csv file B 匯出資料')
df_b = pd.read_csv(fileB, index_col=False, dtype=str)
df_b = df_b.replace({pd.np.nan: ''})
df_bs = df_b.sort_values(by =[COLUM_NAME_ID_BIRTH]).copy()
print2('read_csv stop')
columns_of_a = list(df_as.columns)
columns_of_b = list(df_bs.columns)
columns_of_a_and_b = list(set(columns_of_a).intersection(set(columns_of_b)))
columns_of_a_and_b.sort()
print('columns intersection')
outputA = pd.DataFrame({})
outputB = pd.DataFrame({})
print('re assign data')
for index in columns_of_a_and_b:
outputA[index] = df_as[index]
outputB[index] = df_bs[index]
print('date formatting')
outputA["ID_BIRTH"] = outputA.apply(dateFormatID_BIRTH, axis=1)
outputA["XXXX"] = outputA.apply(dateFormat_XXXX, axis=1)
print('date formatting2')
outputB["ID_BIRTH"] = outputB.apply(dateFormatID_BIRTH, axis=1)
outputB["XXXX"] = outputB.apply(dateFormat_XXXX, axis=1)
print2('to file sorted_from.csv')
outputA.to_csv(outA, index= False)
print2('to file sorted_output.csv')
outputB.to_csv(outB, index= False)
print2('End')