给定以下示例,我如何创建计算列"parent_node";?
import pandas as pd
#create dataframe with just node column
df = pd.DataFrame({
"node": [
"N07 S40 G S06 S29 G N13", "N07", "N07 S28", "N07 S28 G N06 S16",
"N08 N05", "N07 S28 G N05", "N08 N05 G N27", "N07 S28 G N05 N03",
"N07 S28 G N05 N03 G S31", "N07 S28 G N06 S16 G S32"
]
})
#create column called count_of_spaces_in_node
def countSpaces(cell):
try:
return cell.count(" ")
except:
return 0
df["count_of_spaces_in_node"] = df["node"].apply(countSpaces)
#sort by count_of_spaces_in_node, then by node
df = df.sort_values(by=["count_of_spaces_in_node", "node"])
#reset index
df = df.reset_index(drop=True)
#create column called length_of_node
df['length_of_node'] = df['node'].str.len()
生成的df如下所示:
节点 | count_of_space_in_node | 节点长度 |
---|---|---|
N07 | 0 | 3 |
N07 S28 | 1 | 7 |
N08 N05 | 1 | 7 |
N07 S28 G N05 | 3 | 13 |
N08 N05 G N27 | 3 | 13 |
N07 S28 G N05 N03 | 4 | 17 |
N07 S28 G N06 S16 | 4 | 17 |
N07 S28 G N05 N03 G S31 | 6 | 23 |
N07 S28 G N06 S16 G S32 | 6 | 23 |
N07 S40 G S06 S29 G N13 | 6 | 23 |
假设N08 N05 G N27
将有一个父节点作为N08 N05
,则生成了以下片段。
试试下面的片段,
df = pd.DataFrame({
"node": [
"N07 S40 G S06 S29 G N13", "N07", "N07 S28", "N07 S28 G N06 S16",
"N08 N05", "N07 S28 G N05", "N08 N05 G N27", "N07 S28 G N05 N03",
"N07 S28 G N05 N03 G S31", "N07 S28 G N06 S16 G S32"
]
})
node_list = [i.split() for i in df["node"]]
def find_par_node(x):
lis = x.split(" ")
for i in range(-1,-len(lis),-1):
if (lis[:i] in node_list):
return " ".join(lis[:i])
return np.nan
df["parent_node"] = df["node"].apply(find_par_node)
print(df)
node parent_node
N07 S40 G S06 S29 G N13 N07
N07 NaN
N07 S28 N07
N07 S28 G N06 S16 N07 S28
N08 N05 NaN
N07 S28 G N05 N07 S28
N08 N05 G N27 N08 N05
N07 S28 G N05 N03 N07 S28 G N05
N07 S28 G N05 N03 G S31 N07 S28 G N05 N03
N07 S28 G N06 S16 G S32 N07 S28 G N06 S16