为什么Merge()似乎根本不做内部合并,而只是像以前一样抓取相同的数据?



当我调用合并两个DataFrame在一起时,我一直得到与我试图进行内部合并的DataFrame相同的输出。我让它为另一个函数工作,它做同样的事情,但使用不同的两个dataframe集。(我从事健康数据工作,所以我试图自动编辑和合并这些文件,以使我的新用户(ef_in)与系统中当前(ul_in)中的所有用户以及位于ul_in但不在ef_in中的用户禁用它们)。

def client_merge(ef_in, ul_in):
# ef_in = pd. read_csv("COC_Ready_EF.csv", sep=',')
# ul_in = pd.read_csv("Ready UserList.csv", sep=',')
pd.set_option('mode.chained_assignment', None)
ef_in['UniqueID'] = ef_in['UniqueID'].astype(object)
ef_in['ZipCode'] = ef_in['ZipCode'].astype(object)
ef_in['HireDate'] = ef_in['HireDate'].astype(object)
ef_in['DateOfBirth'] = ef_in['DateOfBirth'].astype(object)
ul_in['UniqueID'] = ul_in['UniqueID'].astype(object)
ul_in['Action'] = ul_in['Action'].astype(object)
ul_in['ZipCode'] = ul_in['ZipCode'].astype(object)
df = pd.concat(([ef_in, ul_in]), axis=0, ignore_index=True, sort=False)
df.drop_duplicates(subset=['UniqueID'], keep=False, inplace=True)
new_users = df.merge(ef_in)
disable_users = df.merge(ul_in)
disable_users['Action'].fillna('Disable', inplace=True)
ready_to_print_file = pd.concat([new_users, disable_users], ignore_index=False)
rtpf1 = ready_to_print_file[ready_to_print_file["FirstName"].str.contains("Admin") == False]
rtpf2 = rtpf1[rtpf1["FirstName"].str.contains("Clarks") == False]
rtpf3 = rtpf2[rtpf2["FirstName"].str.contains("Test") == False]
rtpf3.to_csv(path, header=True, index=False)

我得到ul_in从这个:

def client_ul_formatter(in_file):
pd.set_option('mode.chained_assignment', None)
in_file = pd.read_csv(in_file, sep=',')
df = in_file[['FirstName', 'LastName', 'Region', 'UniqueID', 'DateOfBirth', 'Gender',
'ZipCode', 'Email', 'Role', 'HireDate', 'Company', 'Action']]
return df

和我得到我的ef_in从:


def rotate_date(strg, n):
return strg[n:] + strg[:n]

def client_ef_formatter(input_file):
pd.set_option('mode.chained_assignment', None)
input_file = pd.read_csv(input_file, sep=',',
dtype={'HIREDATE': str, 'DATE OF BIRTH': str})
df = input_file[['LAST NAME', 'FIRST AND MIDDLE', 'DATE OF BIRTH', 'GENDER', 'RELATIONSHIP',
'HIREDATE', 'ZIP', 'ALT ID', 'EMAIL ADDRESS']]
df = df.drop(df[df['RELATIONSHIP'] != 'E'].index)
df = df.drop(['RELATIONSHIP'], axis=1)
p = -1
hdf = []
for _ in (df['HIREDATE']):
p = (p + len(df['HIREDATE']) - (len(df['HIREDATE']) - 1))
hd = df['HIREDATE'].iloc[p]
f = rotate_date(hd, -4)
hdf.append(f)
q = -1
ddf = []
for _ in (df['DATE OF BIRTH']):
q = (q + len(df['DATE OF BIRTH']) - (len(df['DATE OF BIRTH']) - 1))
dob = df['DATE OF BIRTH'].iloc[q]
f = rotate_date(dob, -4)
ddf.append(f)
df['HIREDATE'] = hdf
df['DATE OF BIRTH'] = ddf
df['DATE OF BIRTH'] = pd.to_datetime(df['DATE OF BIRTH'], errors='coerce', format='%m%d%Y')
df['HIREDATE'] = pd.to_datetime(df['HIREDATE'], errors='coerce', format='%m%d%Y')
df.rename(columns={'HIREDATE': 'HireDate', 'LAST NAME': 'LastName', 'FIRST AND MIDDLE': 'FirstName',
'DATE OF BIRTH': 'DateOfBirth', 'ALT ID': 'UniqueID', 'GENDER': 'Gender',
'ZIP': 'ZipCode', 'EMAIL ADDRESS': 'Email'}, inplace=True)
df['Region'] = pd.Series(dtype=str)
df['Role'] = pd.Series(dtype=str)
df['Company'] = pd.Series(dtype=str)
df['Action'] = pd.Series(dtype=str)
df = df.reindex(columns=['FirstName', 'LastName', 'Region', 'UniqueID', 'DateOfBirth', 'Gender', 'ZipCode', 'Email',
'Role', 'HireDate', 'Company', 'Action'])
df['Company'].fillna('client_account', inplace=True)
df['Role'].fillna('Employee On Plan', inplace=True)
df.to_csv(path, header=True, index=False)

这让我停了下来。我很不确定,因为它没有抛出错误。

必须将传入的两个dataframe的'UniqueID'转换为浮点数,而不是类型(Object)。

相关内容

最新更新