import pandas as pd
# initialize data of lists.
data = {'Unique ID':['CCC-08559', 'CCC-0856A', 'CCC09512', '08565AAA','CCC-08565'],
'Age':[20, 21, 19, 18, 15]}
# Create DataFrame
df = pd.DataFrame(data)
# Print the output.
df
在regex的帮助下,我想在数据帧中的字母表和数字之间插入连字符,并打印所有进行更改的索引:示例CCC09512应为索引2中的CCC-09512。
IIUC,您可以使用带有查找的regex:
# replace LetterDigit to Letter-Digit or DigitLetter to Digit-Letter
s = df['Unique ID'].str.replace(r'(?<=[a-zA-Z])(?=d)|(?<=d)(?=[a-zA-Z])',
'-', regex=True)
# identify changed column
df['changed'] = df['Unique ID'].ne(s)
# assign changed ID
df['Unique ID'] = s
输出:
Unique ID Age changed
0 CCC-08559 20 False
1 CCC-0856-A 21 True
2 CCC-09512 19 True
3 08565-AAA 18 True
4 CCC-08565 15 False
限制为第一次出现:
# replace LetterDigit to Letter-Digit or DigitLetter to Digit-Letter
s = df['Unique ID'].str.replace(r'(?<=[a-zA-Z])-?(?=d)|(?<=d)-?(?=[a-zA-Z])',
'-', n=1, regex=True)
# identify changed column
df['changed'] = df['Unique ID'].ne(s)
# assign changed ID
df['Unique ID'] = s
输出:
Unique ID Age changed
0 CCC-08559 20 False
1 CCC-0856A 21 False
2 CCC-09512 19 True
3 08565-AAA 18 True
4 CCC-08565 15 False