我的目标是转换像这样的东西:
df1 <- data.frame(
value1 = c(100, 100, 100, 100, 100, 100, 100),
# value2=c(a, b, c, d, e, f, g),
startgroup = c(1, 101, 351, 356, 401, 451, 451),
endgroup = c(100, 350, 355, 400, 450, 450, 500),
groupwidth = c(100, 250, 5, 40, 50, 0 , 50)
)
变成这样:
df2 <- data.frame(
value1 = c(100, 40, 40, 220, 300),
# value2=c(a, b*.4, b*.4, b*.2+c+d, d+e+f),
startgroup = c(1, 101, 201, 301, 401),
endgroup = c(100, 200, 300, 400, 500),
groupwidth = c(100, 100, 100, 100, 100)
)
我已经设法用for循环做到了,但不知何故,每个变量需要大约5-10分钟。不知道为什么这么难。我确信,有一个简单的方法可以做到这一点。
如果有人想知道,我找到了一个有效的解决方案,在不到5秒的时间内(在实际数据上)工作。最终的解决方案也是一个for循环。
df1 <- data.frame(
value1 = c(100, 100, 100, 100, 100, 100, 100),
# value2=c(a, b, c, d, e, f, g),
startgroup = c(1, 101, 351, 356, 401, 451, 451),
endgroup = c(100, 350, 355, 400, 450, 450, 500),
groupwidth = c(100, 250, 5, 40, 50, 0 , 50)) %>%
mutate(startgroup = startgroup - 1)
rm <- max(df1$endgroup)%/%100+1
for (i in 1:rm){
df1 <- df1 %>%
mutate(
a = 0,
a = ifelse(startgroup <= 100*i-100 & endgroup > 100*i,
a + value1/(endgroup-startgroup)*100,
a),
a = ifelse(startgroup <= 100*i-100 & endgroup <= 100*i & endgroup > 100*i-100,
a + value1/(endgroup-startgroup)*(endgroup - (i-1)*100),
a),
a = ifelse(startgroup > 100*i-100 & endgroup <= 100*i,
a + value1,
a),
a = ifelse(startgroup > 100*i-100 & startgroup <= 100*i & endgroup > 100*i,
a + value1/(endgroup-startgroup)*(i*100 - startgroup),
a),
!!paste0(i*100) := a
)
}
df1 <- df1 %>%
pivot_longer(
cols = contains("00"),
names_to = "upper_bound",
values_to = "value",
values_drop_na = TRUE) %>%
group_by(upper_bound) %>%
mutate(upper_bound = as.integer(upper_bound)) %>%
summarize(
value = sum(value)
)
library(tidyverse)
df1 %>%
filter(groupwidth>0) %>%
rowwise() %>%
mutate(gr = list(c(rep(100, groupwidth %/%100), groupwidth %%100)),
ln = length(gr)) %>%
unnest(gr) %>%
group_by(st=cumsum(gr-1) %/%100) %>%
summarise(val = sum(value1/groupwidth * gr),
startgroup = st[1] * 100 + 1,
endgroup = startgroup + 99,
groupwidth = 100)
# A tibble: 5 x 5
st val startgroup endgroup groupwidth
<dbl> <dbl> <dbl> <dbl> <dbl>
1 0 100 1 100 100
2 1 40 101 200 100
3 2 40 201 300 100
4 3 220 301 400 100
5 4 200 401 500 100