r语言 - 将组宽度不一致的数据帧转换为组宽度相等的数据帧



我的目标是转换像这样的东西:

df1 <- data.frame(
value1 = c(100, 100, 100, 100, 100, 100, 100),
#  value2=c(a, b, c, d, e, f, g),
startgroup = c(1, 101, 351, 356, 401, 451, 451),
endgroup = c(100, 350, 355, 400, 450, 450, 500),
groupwidth = c(100, 250, 5, 40, 50, 0 , 50)
)

变成这样:

df2 <- data.frame(
value1 = c(100, 40, 40, 220, 300),
#  value2=c(a, b*.4, b*.4, b*.2+c+d, d+e+f),
startgroup = c(1, 101, 201, 301, 401),
endgroup = c(100, 200, 300, 400, 500),
groupwidth = c(100, 100, 100, 100, 100)
)

我已经设法用for循环做到了,但不知何故,每个变量需要大约5-10分钟。不知道为什么这么难。我确信,有一个简单的方法可以做到这一点。

如果有人想知道,我找到了一个有效的解决方案,在不到5秒的时间内(在实际数据上)工作。最终的解决方案也是一个for循环。

df1 <- data.frame(
value1 = c(100, 100, 100, 100, 100, 100, 100),
#  value2=c(a, b, c, d, e, f, g),
startgroup = c(1, 101, 351, 356, 401, 451, 451),
endgroup = c(100, 350, 355, 400, 450, 450, 500),
groupwidth = c(100, 250, 5, 40, 50, 0 , 50)) %>% 
mutate(startgroup = startgroup - 1)

rm <- max(df1$endgroup)%/%100+1
for (i in 1:rm){
df1 <- df1 %>% 
mutate(
a = 0,
a = ifelse(startgroup <= 100*i-100 & endgroup > 100*i, 
a + value1/(endgroup-startgroup)*100, 
a),
a = ifelse(startgroup <= 100*i-100 & endgroup <= 100*i & endgroup > 100*i-100,  
a + value1/(endgroup-startgroup)*(endgroup - (i-1)*100),
a),
a = ifelse(startgroup > 100*i-100 & endgroup <= 100*i,  
a + value1,
a),
a = ifelse(startgroup > 100*i-100 & startgroup <= 100*i & endgroup > 100*i,  
a + value1/(endgroup-startgroup)*(i*100 - startgroup),
a),
!!paste0(i*100) := a
)
}
df1 <- df1 %>% 
pivot_longer(
cols = contains("00"),
names_to = "upper_bound",
values_to = "value",
values_drop_na = TRUE) %>% 
group_by(upper_bound) %>% 
mutate(upper_bound = as.integer(upper_bound)) %>% 
summarize(
value = sum(value)
)
library(tidyverse)
df1 %>%
filter(groupwidth>0) %>%
rowwise() %>%
mutate(gr = list(c(rep(100, groupwidth %/%100), groupwidth %%100)),
ln = length(gr)) %>%
unnest(gr) %>%
group_by(st=cumsum(gr-1) %/%100) %>%
summarise(val = sum(value1/groupwidth * gr), 
startgroup = st[1] * 100 + 1, 
endgroup = startgroup + 99, 
groupwidth = 100)
# A tibble: 5 x 5
st   val startgroup endgroup groupwidth
<dbl> <dbl>      <dbl>    <dbl>      <dbl>
1     0   100          1      100        100
2     1    40        101      200        100
3     2    40        201      300        100
4     3   220        301      400        100
5     4   200        401      500        100

相关内容

  • 没有找到相关文章

最新更新