我有一个包含5列的data.tableclients
。我对列product
感兴趣,它只能有三个值:Product 1
、Product 2
、Product 3
。
i有一个包含三个元素的列表:Product 1
、Product 2
、Product 3
。每个元素都由7个列表等组成,基本上是一个嵌套结构。这里我感兴趣的是一个元素,它是位于列表第4层的向量。
产品[["产品2"]]$CC$ID[1] ";A"B"C";
最后,对于我的data.tableclients
中的每一行,根据列clients$product
的哪个值,我从对应于clients$product
中的乘积的对应列表元素中随机抽取一个值。我试着用apply
:来做这件事
apply(clients$product, 1, function(x) sample(products$x$CC$ID, 1))
Error in apply(clients$product, 1, function(x) sample(products$x$CC$ID, :
dim(X) must have a positive length
和
apply(clients$product, 1, function(x) sample(products[[clients$product]]$CC$ID, 1))
Error in apply(clients$product, 1, function(x) sample(products[[clients$product]]$CC$ID, :
dim(X) must have a positive length
我想我的apply
出了什么问题?
如何实现我的目标?
整个练习的目标是创建一个新的列clients$name。对于客户端$name的每个值,将从列表产品$<基于列客户端$product>抄送$ID
这就是您想要的吗?
products <- list('Product 1' = list('CC' = data.table(no = c(1,2,3),
ID = c("A", "B", "C"))),
'Product 2' = list('CC' = data.table(no = c(1,2,3),
ID = c("E", "F", "G"))),
'Product 3' = list('CC' = data.table(no = c(1,2,3),
ID = c("I", "J", "X"))))
clients <- structure(list(clientID = c(37796L, 20469L, 94233L, 68909L, 78700L,
70727L, 85738L, 3441L, 29577L, 40235L, 17863L, 14273L, 46739L,
60392L, 80945L, 62326L, 121555L, 127235L, 39507L, 41697L), product = c("Product 1",
"Product 2", "Product 2", "Product 2", "Product 1", "Product 2",
"Product 2", "Product 2", "Product 1", "Product 2", "Product 1",
"Product 3", "Product 2", "Product 3", "Product 1", "Product 1",
"Product 2", "Product 3", "Product 1", "Product 2"), gender = structure(c(1L,
2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 2L), .Label = c("M", "W"), class = "factor"), status = structure(c(1L,
1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("EMP", "NEMP"), class = "factor"), age = c(40L,
48L, 23L, 59L, 43L, 32L, 42L, 46L, 47L, 24L, 40L, 39L, 40L, 32L,
58L, 45L, 48L, 38L, 51L, 22L), year = c(2018L, 2019L, 2019L,
2018L, 2017L, 2018L, 2019L, 2019L, 2019L, 2019L, 2017L, 2017L,
2018L, 2018L, 2018L, 2019L, 2019L, 2019L, 2018L, 2017L), exposure = c(1,
1, 1, 1, 1, 1, 0.930235934569214, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1)), row.names = c(NA, -20L), class = c("data.table",
"data.frame"))
random_product_id <- sapply(clients$product, function(x){
sample(products[[x]]$CC$ID, 1)
})
clients$product <- random_product_id
你可以试试这个。
set.seed(42)
clients <- as.data.table(transform(clients, ID=sapply(clients$product, function(x) {
s <- products[[x]][[1]]
unlist(s[sample(1:nrow(s), 1), "ID"])
})))
# clientID product gender status age year exposure ID
# 1: 37796 Product 1 M EMP 40 2018 1 A
# 2: 20469 Product 2 M EMP 58 2019 1 E
# 3: 94233 Product 2 M EMP 45 2019 1 E
# 4: 68909 Product 1 W EMP 48 2018 1 A
# 5: 20469 Product 3 M EMP 58 2019 1 J
# 6: 94233 Product 3 M EMP 45 2019 1 J
# 7: 68909 Product 1 W EMP 48 2018 1 B
数据:
clients <- structure(list(clientID = c(37796L, 20469L, 94233L, 68909L, 20469L,
94233L, 68909L), product = c("Product 1", "Product 2", "Product 2",
"Product 1", "Product 3", "Product 3", "Product 1"), gender = c("M",
"M", "M", "W", "M", "M", "W"), status = c("EMP", "EMP", "EMP",
"EMP", "EMP", "EMP", "EMP"), age = c(40L, 58L, 45L, 48L, 58L,
45L, 48L), year = c(2018L, 2019L, 2019L, 2018L, 2019L, 2019L,
2018L), exposure = c(1L, 1L, 1L, 1L, 1L, 1L, 1L)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7"))
products <- list(`Product 1` = list(CC = structure(list(no = c(1, 2, 3),
ID = c("A", "B", "C")), row.names = c(NA, -3L), class = c("data.table",
"data.frame"), .internal.selfref = <pointer: 0x0000000000101ef0>)),
`Product 2` = list(CC = structure(list(no = c(1, 2, 3), ID = c("E",
"F", "G")), row.names = c(NA, -3L), class = c("data.table",
"data.frame"), .internal.selfref = <pointer: 0x0000000000101ef0>)),
`Product 3` = list(CC = structure(list(no = c(1, 2, 3), ID = c("I",
"J", "X")), row.names = c(NA, -3L), class = c("data.table",
"data.frame"), .internal.selfref = <pointer: 0x0000000000101ef0>)))