我有一个数据,我想通过取差值来比较二乘二的测量结果,如果差值大于0.2,则添加另一列为"是",否则为"否"。就像图像中的示例数据一样。最后,对于每个人如果至少有一个是,那么最后是是。我已经手动填写了第一行,但我想为我所有的个人(1000个人)做同样的事情。输入图片描述
library(readxl)
dd <- read_excel("dd.xlsx")
dd <- dput(dd)
structure(list(ID = c(1, 2, 3, 4, 5, 6), m1 = c("2.1", "1.4","NA", "4.0", "2.5", "NA"),
m2 = c("2.8", "1.5", "NA", "4.0", "3.8", "1.1"),
m3 = c("3.5", "1.5", "NA", "4.0", "NA", "1.3"),
m4 = c("NA", "1.8", "1.8", "4.0", "NA", "1.3"),
m5 = c("NA","1.5", "2.9", "NA", "NA", "1.5"),
m6 = c("NA", "NA", "3.5", "NA", "NA", "1.2")),
class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L))
这种方法使用更长的枢轴来估计列,然后使用rename更宽的枢轴将得到您想要的结果(参见第二个解决方案)。但是,如果您只需要Final
列的ID,只需这样做:
- 通过ID获取
Final
的简单方法
dd %>%
pivot_longer(cols = !ID) %>%
mutate(value=as.numeric(value)) %>%
group_by(ID) %>%
summarize(Final = any(value-lag(value)>0.2))
- 扩展方法,如果你真的想要所有的中间变量
dd %>%
pivot_longer(cols = !ID) %>%
mutate(value=as.numeric(value)) %>%
group_by(ID) %>%
mutate(diff=value-lag(value),
gt=if_else(diff>0.2, "Yes", "No")) %>%
pivot_wider(id_cols = ID,names_from = name,values_from = value:gt) %>%
rowwise() %>%
mutate(Final = any(c_across(starts_with("gt"))=="Yes")) %>%
select(!c(diff_m1,gt_m1)) %>%
rename_with(~c("ID", "m1","m2","m3","m4","m5","m6",
"m2-m1", "m3-m2", "m4-m3", "m5-m4", "m6-m5",
"r(m2-m1)", "r(m3-m2)", "r(m4-m3)", "r(m5-m4)", "r(m6-m5)",
"Final"), .cols = everything()
)
输出:
ID m1 m2 m3 m4 m5 m6 `m2-m1` `m3-m2` `m4-m3` `m5-m4` `m6-m5` `r(m2-m1)` `r(m3-m2)` `r(m4-m3)`
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr>
1 1 2.1 2.8 3.5 NA NA NA 0.7 0.7 NA NA NA Yes Yes NA
2 2 1.4 1.5 1.5 1.8 1.5 NA 0.100 0 0.3 -0.3 NA No No Yes
3 3 NA NA NA 1.8 2.9 3.5 NA NA NA 1.1 0.6 NA NA NA
4 4 4 4 4 4 NA NA 0 0 0 NA NA No No No
5 5 2.5 3.8 NA NA NA NA 1.3 NA NA NA NA Yes NA NA
6 6 NA 1.1 1.3 1.3 1.5 1.2 NA 0.2 0 0.2 -0.3 NA No No
# … with 3 more variables: `r(m5-m4)` <chr>, `r(m6-m5)` <chr>, Final <lgl>
这是另一种tidyverse
方法:
library(dplyr)
library(tibble)
dd %>%
type.convert(as.is =TRUE) %>%
select(-1) %>%
mutate(across(-1, .names = "{.col}-{names(.)[match(.col,
names(.))-1]}") - across(-last_col())) %>%
mutate(across(contains("-"), ~ifelse(is.na(.), NA, "YES"), .names = "r({.col})")) %>%
mutate(Final = ifelse(rowSums(dd == "YES") > 0, "No", "Yes")) %>%
add_column(ID = dd$ID, .before = "m1")
ID m1 m2 m3 m4 m5 m6 `m2-m1` `m3-m2` `m4-m3` `m5-m4` `m6-m5` `r(m2-m1)` `r(m3-m2)` `r(m4-m3)` `r(m5-m4)` `r(m6-m5)` Final
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
1 1 2.1 2.8 3.5 NA NA NA 0.7 0.7 NA NA NA YES YES NA NA NA Yes
2 2 1.4 1.5 1.5 1.8 1.5 NA 0.100 0 0.3 -0.3 NA YES YES YES YES NA Yes
3 3 NA NA NA 1.8 2.9 3.5 NA NA NA 1.1 0.6 NA NA NA YES YES Yes
4 4 4 4 4 4 NA NA 0 0 0 NA NA YES YES YES NA NA Yes
5 5 2.5 3.8 NA NA NA NA 1.3 NA NA NA NA YES NA NA NA NA Yes
6 6 NA 1.1 1.3 1.3 1.5 1.2 NA 0.2 0 0.2 -0.3 NA YES YES YES YES Yes