r-应用函数从其他列表的元素中提取随机样本



我有一个包含5列的data.tableclients。我对列product感兴趣,它只能有三个值:Product 1Product 2Product 3

i有一个包含三个元素的列表:Product 1Product 2Product 3。每个元素都由7个列表等组成,基本上是一个嵌套结构。这里我感兴趣的是一个元素,它是位于列表第4层的向量。

产品[["产品2"]]$CC$ID[1] ";A"B"C";

最后,对于我的data.tableclients中的每一行,根据列clients$product的哪个值,我从对应于clients$product中的乘积的对应列表元素中随机抽取一个值。我试着用apply:来做这件事

apply(clients$product, 1, function(x) sample(products$x$CC$ID, 1))
Error in apply(clients$product, 1, function(x) sample(products$x$CC$ID,  : 
dim(X) must have a positive length

apply(clients$product, 1, function(x) sample(products[[clients$product]]$CC$ID, 1))
Error in apply(clients$product, 1, function(x) sample(products[[clients$product]]$CC$ID,  : 
dim(X) must have a positive length

我想我的apply出了什么问题?

如何实现我的目标?

整个练习的目标是创建一个新的列clients$name。对于客户端$name的每个值,将从列表产品$<基于列客户端$product>抄送$ID

这就是您想要的吗?

products <- list('Product 1' = list('CC' = data.table(no = c(1,2,3),
ID = c("A", "B", "C"))),
'Product 2' = list('CC' = data.table(no = c(1,2,3),
ID = c("E", "F", "G"))),
'Product 3' = list('CC' = data.table(no = c(1,2,3),
ID = c("I", "J", "X"))))
clients <- structure(list(clientID = c(37796L, 20469L, 94233L, 68909L, 78700L, 
70727L, 85738L, 3441L, 29577L, 40235L, 17863L, 14273L, 46739L, 
60392L, 80945L, 62326L, 121555L, 127235L, 39507L, 41697L), product = c("Product 1", 
"Product 2", "Product 2", "Product 2", "Product 1", "Product 2", 
"Product 2", "Product 2", "Product 1", "Product 2", "Product 1", 
"Product 3", "Product 2", "Product 3", "Product 1", "Product 1", 
"Product 2", "Product 3", "Product 1", "Product 2"), gender = structure(c(1L, 
2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 
1L, 1L, 2L), .Label = c("M", "W"), class = "factor"), status = structure(c(1L, 
1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("EMP", "NEMP"), class = "factor"), age = c(40L, 
48L, 23L, 59L, 43L, 32L, 42L, 46L, 47L, 24L, 40L, 39L, 40L, 32L, 
58L, 45L, 48L, 38L, 51L, 22L), year = c(2018L, 2019L, 2019L, 
2018L, 2017L, 2018L, 2019L, 2019L, 2019L, 2019L, 2017L, 2017L, 
2018L, 2018L, 2018L, 2019L, 2019L, 2019L, 2018L, 2017L), exposure = c(1, 
1, 1, 1, 1, 1, 0.930235934569214, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1)), row.names = c(NA, -20L), class = c("data.table", 
"data.frame"))
random_product_id <- sapply(clients$product, function(x){
sample(products[[x]]$CC$ID, 1)
})
clients$product <- random_product_id

你可以试试这个。

set.seed(42)
clients <- as.data.table(transform(clients, ID=sapply(clients$product, function(x) {
s <- products[[x]][[1]]
unlist(s[sample(1:nrow(s), 1), "ID"])
})))
#    clientID   product gender status age year exposure ID
# 1:    37796 Product 1      M    EMP  40 2018        1  A
# 2:    20469 Product 2      M    EMP  58 2019        1  E
# 3:    94233 Product 2      M    EMP  45 2019        1  E
# 4:    68909 Product 1      W    EMP  48 2018        1  A
# 5:    20469 Product 3      M    EMP  58 2019        1  J
# 6:    94233 Product 3      M    EMP  45 2019        1  J
# 7:    68909 Product 1      W    EMP  48 2018        1  B

数据:

clients <- structure(list(clientID = c(37796L, 20469L, 94233L, 68909L, 20469L, 
94233L, 68909L), product = c("Product 1", "Product 2", "Product 2", 
"Product 1", "Product 3", "Product 3", "Product 1"), gender = c("M", 
"M", "M", "W", "M", "M", "W"), status = c("EMP", "EMP", "EMP", 
"EMP", "EMP", "EMP", "EMP"), age = c(40L, 58L, 45L, 48L, 58L, 
45L, 48L), year = c(2018L, 2019L, 2019L, 2018L, 2019L, 2019L, 
2018L), exposure = c(1L, 1L, 1L, 1L, 1L, 1L, 1L)), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6", "7"))
products <- list(`Product 1` = list(CC = structure(list(no = c(1, 2, 3), 
ID = c("A", "B", "C")), row.names = c(NA, -3L), class = c("data.table", 
"data.frame"), .internal.selfref = <pointer: 0x0000000000101ef0>)), 
`Product 2` = list(CC = structure(list(no = c(1, 2, 3), ID = c("E", 
"F", "G")), row.names = c(NA, -3L), class = c("data.table", 
"data.frame"), .internal.selfref = <pointer: 0x0000000000101ef0>)), 
`Product 3` = list(CC = structure(list(no = c(1, 2, 3), ID = c("I", 
"J", "X")), row.names = c(NA, -3L), class = c("data.table", 
"data.frame"), .internal.selfref = <pointer: 0x0000000000101ef0>)))

最新更新