我有一个看起来像这样的列:
<表类>
SNO
标签
tbody><<tr>1 #名称-账户-动态表- 301889874517022 2#名称-账户-动态表- 301892680435489 3 #名称-账户-动态表- 301898303041520 4#名称-账户-动态表- 301899707194367 5# PartyName _oj131 - 300010926780895 6# PartyName _oj145 - 100049488429695 7# PartyName _oj148 - 100046909736911 8# PartyName _oj156 - 100039245551420 9 # SalesCreditImage_100005748173584 10# SalesCreditImage_1676885837179 11# SalesCreditImage_1676894490983 12 # SalesCreditImage_1677180209183 13# SalesCreditImage_1677221857743 14# SalesCreditImage_300007572814764 15 # SalesCreditImage_300010430355109 16 # _oj1046_headerContainer 17# _oj1053_headerContainer 18 # _oj110_h 表类>
使用gsub
修改字符串的一种方法。由于16-18有相反的需求,解决方案是将过程分成两部分。
df$category <- gsub("_|.*\d+", "", df$Label)
df$category <- paste0(gsub("#|_|-.*|\d+.*", "", df$Label), df$category)
df
SNO Label category
1 1 #Name-account-dynamic-table-301889874517022 Name
2 2 #Name-account-dynamic-table-301892680435489 Name
3 3 #Name-account-dynamic-table-301898303041520 Name
4 4 #Name-account-dynamic-table-301899707194367 Name
5 5 #PartyName-_oj131-300010926780895 PartyName
6 6 #PartyName-_oj145-100049488429695 PartyName
7 7 #PartyName-_oj148-100046909736911 PartyName
8 8 #PartyName-_oj156-100039245551420 PartyName
9 9 #SalesCreditImage_100005748173584 SalesCreditImage
10 10 #SalesCreditImage_1676885837179 SalesCreditImage
11 11 #SalesCreditImage_1676894490983 SalesCreditImage
12 12 #SalesCreditImage_1677180209183 SalesCreditImage
13 13 #SalesCreditImage_1677221857743 SalesCreditImage
14 14 #SalesCreditImage_300007572814764 SalesCreditImage
15 15 #SalesCreditImage_300010430355109 SalesCreditImage
16 16 #_oj1046_headerContainer ojheaderContainer
17 17 #_oj1053_headerContainer ojheaderContainer
18 18 #_oj110_h ojh
df <- structure(list(SNO = 1:18, Label = c("#Name-account-dynamic-table-301889874517022",
"#Name-account-dynamic-table-301892680435489", "#Name-account-dynamic-table-301898303041520",
"#Name-account-dynamic-table-301899707194367", "#PartyName-_oj131-300010926780895",
"#PartyName-_oj145-100049488429695", "#PartyName-_oj148-100046909736911",
"#PartyName-_oj156-100039245551420", "#SalesCreditImage_100005748173584",
"#SalesCreditImage_1676885837179", "#SalesCreditImage_1676894490983",
"#SalesCreditImage_1677180209183", "#SalesCreditImage_1677221857743",
"#SalesCreditImage_300007572814764", "#SalesCreditImage_300010430355109",
"#_oj1046_headerContainer", "#_oj1053_headerContainer", "#_oj110_h"
)), class = "data.frame", row.names = c(NA, -18L))
df <- structure(list(SNO = 1:18, Label = c("#Name-account-dynamic-table-301889874517022",
"#Name-account-dynamic-table-301892680435489", "#Name-account-dynamic-table-301898303041520",
"#Name-account-dynamic-table-301899707194367", "#PartyName-_oj131-300010926780895",
"#PartyName-_oj145-100049488429695", "#PartyName-_oj148-100046909736911",
"#PartyName-_oj156-100039245551420", "#SalesCreditImage_100005748173584",
"#SalesCreditImage_1676885837179", "#SalesCreditImage_1676894490983",
"#SalesCreditImage_1677180209183", "#SalesCreditImage_1677221857743",
"#SalesCreditImage_300007572814764", "#SalesCreditImage_300010430355109",
"#_oj1046_headerContainer", "#_oj1053_headerContainer", "#_oj110_h"
)), class = "data.frame", row.names = c(NA, -18L))
我不确定区分最后一行和前两行的确切规则是什么,因此您可以对类别进行硬编码,或者根据删除数字,下划线等后保留的Label
部分分配连续ID:
library(tidyverse)
# hard-code new column containing category
df |>
mutate(category = case_match(
row_number(),
1:4 ~ 1,
5:8 ~ 2,
9:15 ~ 3,
16:17 ~ 4,
18 ~ 5
))
#> # A tibble: 18 × 3
#> SNO Label category
#> <chr> <chr> <dbl>
#> 1 1 #Name-account-dynamic-table-301889874517022 1
#> 2 2 #Name-account-dynamic-table-301892680435489 1
#> 3 3 #Name-account-dynamic-table-301898303041520 1
#> 4 4 #Name-account-dynamic-table-301899707194367 1
#> 5 5 #PartyName-_oj131-300010926780895 2
#> 6 6 #PartyName-_oj145-100049488429695 2
#> 7 7 #PartyName-_oj148-100046909736911 2
#> 8 8 #PartyName-_oj156-100039245551420 2
#> 9 9 #SalesCreditImage_100005748173584 3
#> 10 10 #SalesCreditImage_1676885837179 3
#> 11 11 #SalesCreditImage_1676894490983 3
#> 12 12 #SalesCreditImage_1677180209183 3
#> 13 13 #SalesCreditImage_1677221857743 3
#> 14 14 #SalesCreditImage_300007572814764 3
#> 15 15 #SalesCreditImage_300010430355109 3
#> 16 16 #_oj1046_headerContainer 4
#> 17 17 #_oj1053_headerContainer 4
#> 18 18 #_oj110_h 5
或
# create continuous ID that increments when string part changes
df |>
mutate(category_label = str_remove_all(Label, "[#_\-\d]"),
category_id = consecutive_id(category_label))
#> # A tibble: 18 × 4
#> SNO Label category_label category_id
#> <chr> <chr> <chr> <int>
#> 1 1 #Name-account-dynamic-table-301889874517022 Nameaccountdyn… 1
#> 2 2 #Name-account-dynamic-table-301892680435489 Nameaccountdyn… 1
#> 3 3 #Name-account-dynamic-table-301898303041520 Nameaccountdyn… 1
#> 4 4 #Name-account-dynamic-table-301899707194367 Nameaccountdyn… 1
#> 5 5 #PartyName-_oj131-300010926780895 PartyNameoj 2
#> 6 6 #PartyName-_oj145-100049488429695 PartyNameoj 2
#> 7 7 #PartyName-_oj148-100046909736911 PartyNameoj 2
#> 8 8 #PartyName-_oj156-100039245551420 PartyNameoj 2
#> 9 9 #SalesCreditImage_100005748173584 SalesCreditIma… 3
#> 10 10 #SalesCreditImage_1676885837179 SalesCreditIma… 3
#> 11 11 #SalesCreditImage_1676894490983 SalesCreditIma… 3
#> 12 12 #SalesCreditImage_1677180209183 SalesCreditIma… 3
#> 13 13 #SalesCreditImage_1677221857743 SalesCreditIma… 3
#> 14 14 #SalesCreditImage_300007572814764 SalesCreditIma… 3
#> 15 15 #SalesCreditImage_300010430355109 SalesCreditIma… 3
#> 16 16 #_oj1046_headerContainer ojheaderContai… 4
#> 17 17 #_oj1053_headerContainer ojheaderContai… 4
#> 18 18 #_oj110_h ojh 5
创建于2023-04-11与reprex v2.0.2
,
df <- tribble(
~SNO, ~Label,
"1", "#Name-account-dynamic-table-301889874517022",
"2", "#Name-account-dynamic-table-301892680435489",
"3", "#Name-account-dynamic-table-301898303041520",
"4", "#Name-account-dynamic-table-301899707194367",
"5", "#PartyName-_oj131-300010926780895",
"6", "#PartyName-_oj145-100049488429695",
"7", "#PartyName-_oj148-100046909736911",
"8", "#PartyName-_oj156-100039245551420",
"9", "#SalesCreditImage_100005748173584",
"10", "#SalesCreditImage_1676885837179",
"11", "#SalesCreditImage_1676894490983",
"12", "#SalesCreditImage_1677180209183",
"13", "#SalesCreditImage_1677221857743",
"14", "#SalesCreditImage_300007572814764",
"15", "#SalesCreditImage_300010430355109",
"16", "#_oj1046_headerContainer",
"17", "#_oj1053_headerContainer",
"18", "#_oj110_h"
)