我想转换这样的数据帧:
mre <- tibble::tribble(
~folder3, ~folder2, ~folder1,
"V3=4", "V2=1", "V1=0",
"V3=5", "V2=1", "V1=0",
"V3=4", "V2=2", "V1=0",
"V3=5", "V2=2", "V1=0",
"V3=4", "V2=1", "V1=1",
"V3=5", "V2=1", "V1=1",
"V3=4", "V2=2", "V1=1",
"V3=5", "V2=2", "V1=1"
)
到此:
folder3 folder2 folder1 V3 V2 V1
V3=4 V2=1 V1=0 4 1 0
V3=5 V2=1 V1=0 5 1 0
V3=4 V2=2 V1=0 4 2 0
V3=5 V2=2 V1=0 5 2 0
V3=4 V2=1 V1=1 4 1 1
V3=5 V2=1 V1=1 5 1 1
V3=4 V2=2 V1=1 4 2 1
V3=5 V2=2 V1=1 5 2 1
基本上提取每个folder?
列的唯一变量名(此处为"V3"、"V2"、"V1",但可以是任何有效名称,如"a"、"b"、"c"(作为新列名,并将值保持在适当位置。
我有一个单独的";文件夹";列使用第一行值:
mre %>%
tidyr::extract(folder1, into = .$folder1[1] |> word(1, sep="="), "\S+=(\d+)", remove = FALSE)
但我不知道如何扩展到多个";文件夹";列(数字不是固定的(。我试着根据这里的答案使用map
,但不知道如何从第一行获得变量名。
有什么建议吗?
我们可以在across
本身中创建新列,而不是extract
-mutate
across
所有列(everything()
(,使用str_extract
获取=
后面的数字(\d+
(,同时使用str_replace
修改names
中的列名
library(dplyr)
library(stringr)
mre %>%
mutate(across(everything(),
~ as.numeric(str_extract(., "(?<=\=)\d+")),
.names = "{str_replace(.col, 'folder', 'V')}"))
-输出
# A tibble: 8 × 6
folder3 folder2 folder1 V3 V2 V1
<chr> <chr> <chr> <dbl> <dbl> <dbl>
1 V3=4 V2=1 V1=0 4 1 0
2 V3=5 V2=1 V1=0 5 1 0
3 V3=4 V2=2 V1=0 4 2 0
4 V3=5 V2=2 V1=0 5 2 0
5 V3=4 V2=1 V1=1 4 1 1
6 V3=5 V2=1 V1=1 5 1 1
7 V3=4 V2=2 V1=1 4 2 1
8 V3=5 V2=2 V1=1 5 2 1
如果列名需要从"文件夹"列值中提取,那么一个选项是用pivot_longer
将列重塑为"长",用separate_rows
将列拆分,用pivot_wider
将列重塑回"宽",并用原始数据绑定列
library(tidyr)
mre %>%
mutate(rn = row_number()) %>%
pivot_longer(cols= starts_with('folder'), names_to = NULL) %>%
separate(value, into = c('name', 'value'), sep="\=", convert = TRUE) %>%
pivot_wider(names_from = name, values_from = value) %>%
select(-rn) %>%
bind_cols(mre, .)
-输出
# A tibble: 8 × 6
folder3 folder2 folder1 V3 V2 V1
<chr> <chr> <chr> <int> <int> <int>
1 V3=4 V2=1 V1=0 4 1 0
2 V3=5 V2=1 V1=0 5 1 0
3 V3=4 V2=2 V1=0 4 2 0
4 V3=5 V2=2 V1=0 5 2 0
5 V3=4 V2=1 V1=1 4 1 1
6 V3=5 V2=1 V1=1 5 1 1
7 V3=4 V2=2 V1=1 4 2 1
8 V3=5 V2=2 V1=1 5 2 1
基本R选项
cbind(
mre,
unclass(
xtabs(
V2 ~ id + factor(V1, levels = unique(V1)),
do.call(
rbind,
Map(function(x) cbind(read.table(text = x, sep = "="), id = seq_along(x)), mre)
)
)
)
)
给出
folder3 folder2 folder1 V3 V2 V1
1 V3=4 V2=1 V1=0 4 1 0
2 V3=5 V2=1 V1=0 5 1 0
3 V3=4 V2=2 V1=0 4 2 0
4 V3=5 V2=2 V1=0 5 2 0
5 V3=4 V2=1 V1=1 4 1 1
6 V3=5 V2=1 V1=1 5 1 1
7 V3=4 V2=2 V1=1 4 2 1
8 V3=5 V2=2 V1=1 5 2 1
代码分解
Map(..., mre)
> Map(function(x) cbind(read.table(text = x, sep = "="), id = seq_along(x)), mre)
$folder3
V1 V2 id
1 V3 4 1
2 V3 5 2
3 V3 4 3
4 V3 5 4
5 V3 4 5
6 V3 5 6
7 V3 4 7
8 V3 5 8
$folder2
V1 V2 id
1 V2 1 1
2 V2 1 2
3 V2 2 3
4 V2 2 4
5 V2 1 5
6 V2 1 6
7 V2 2 7
8 V2 2 8
$folder1
V1 V2 id
1 V1 0 1
2 V1 0 2
3 V1 0 3
4 V1 0 4
5 V1 1 5
6 V1 1 6
7 V1 1 7
8 V1 1 8
do.call(rbind, ...)
> do.call(
+ rbind,
+ Map(function(x) cbind(read.table(text = x, sep = "="), id = seq_along(x)), mre)
+ )
V1 V2 id
folder3.1 V3 4 1
folder3.2 V3 5 2
folder3.3 V3 4 3
folder3.4 V3 5 4
folder3.5 V3 4 5
folder3.6 V3 5 6
folder3.7 V3 4 7
folder3.8 V3 5 8
folder2.1 V2 1 1
folder2.2 V2 1 2
folder2.3 V2 2 3
folder2.4 V2 2 4
folder2.5 V2 1 5
folder2.6 V2 1 6
folder2.7 V2 2 7
folder2.8 V2 2 8
folder1.1 V1 0 1
folder1.2 V1 0 2
folder1.3 V1 0 3
folder1.4 V1 0 4
folder1.5 V1 1 5
folder1.6 V1 1 6
folder1.7 V1 1 7
folder1.8 V1 1 8
xtabs(..., ...)
> xtabs(
+ V2 ~ id + factor(V1, levels = unique(V1)),
+ do.call(
+ rbind,
+ Map(function(x) cbind(read.table(text = x, sep = "="), id = se .... [TRUNCATED]
factor(V1, levels = unique(V1))
id V3 V2 V1
1 4 1 0
2 5 1 0
3 4 2 0
4 5 2 0
5 4 1 1
6 5 1 1
7 4 2 1
8 5 2 1
这里有另一个可以使用的解决方案:
library(dplyr)
library(stringr)
library(purrr)
mre %>%
bind_cols(
setNames(seq_len(ncol(mre)) %>%
map_dfc(~ {y <- unname(unlist(mre[.x]))
as.integer(str_remove(y, "\w+="))}), paste0("V", 1:ncol(mre)))
)
# A tibble: 8 x 6
folder3 folder2 folder1 V1 V2 V3
<chr> <chr> <chr> <int> <int> <int>
1 V3=4 V2=1 V1=0 4 1 0
2 V3=5 V2=1 V1=0 5 1 0
3 V3=4 V2=2 V1=0 4 2 0
4 V3=5 V2=2 V1=0 5 2 0
5 V3=4 V2=1 V1=1 4 1 1
6 V3=5 V2=1 V1=1 5 1 1
7 V3=4 V2=2 V1=1 4 2 1
8 V3=5 V2=2 V1=1 5 2 1