r语言 - 如何计算dplyr中多个变量的小数位数



我有一个包含27列数值的标题。我想计算小数点后的位数,特别是我想知道每列中可以出现的最大位数

我尝试将数值转换为字符串变量,然后在小数点后的字符串子集,然后计算新字符串的个数,然后找到max。

afterdecimal_val1 <- data %>%
mutate(
across(where(is.numeric), as.character),
) %>%
rowwise() %>%
mutate(
init = str_split(value_1,"[.]"),
init2 = init[2],
init3 = nchar(init2)
) %>%
ungroup() %>%
mutate(init4 = max(init3, na.rm=TRUE))

好吧,这只适用于列"value_1"我很确定这不是最好的方法。

你知道更可行的方法吗?或者:你能帮我升级代码,使其适用于所有27个变量,而不仅仅是"value_1"?

我真的很喜欢用dplyr回答!

谢谢你的帮助!

dpylr解决方案,由于缺乏MRE而未经测试:

library(dplyr)
library(tidyr)
data %>% 
pivot_longer(starts_with("value"), names_to = "variable", values_to = "value") %>% 
mutate(N_digits = nchar(gsub(".*\.", "", as.character(value)))) %>% 
group_by(variable) %>% 
summarise(max_N_digits = max(N_digits)) %>% 
pivot_wider(names_from = variable, values_from = max_N_digits)
<标题>

编辑这个应该也适用于没有小数点的数字(即给出0):

data1 <- data.frame(value1 = c(1.11, 1.121,1.1212),
value2 = c(6666, 5,5),
value3 = c(1.1111, 1.121,1.12111))
library(dplyr)
library(tidyr)
data1 %>% 
pivot_longer(starts_with("value"), names_to = "variable", values_to = "value") %>% 
mutate(N_digits = nchar(gsub(".*\.|^[^.]+$", "", as.character(value)))) %>% 
group_by(variable) %>% 
summarise(max_N_digits = max(N_digits)) %>% 
pivot_wider(names_from = variable, values_from = max_N_digits)

的回报:

value1 value2 value3
<int>  <int>  <int>
1      4      0      5

可以吗?

df1 <- data.frame(c1 = c(1.11, 1.121,1.1212),
c2 = c(1.1, 1.121,1.121),
c3 = c(1.1111, 1.121,1.12111))
apply(apply(df1, 2, function(x) gsub('(^\d)(\.)(\d+)','\3',as.character(x))), 2, function(y) max(nchar(y)))
c1 c2 c3 
4  3  5 

Tidyverse solutions:

library(tidyverse)
# Option 1 using `purrr::`:
df1 %>% 
summarise_if(is.double, ~max(
str_length(
map_chr(
str_split(
as.character(.),
"\.",
),
function(x) x[[2]]
)
)
)
)
# Option 2 using regex:
df1 %>%
summarise_if(
is.double,
function(x){
max(
str_length(
str_replace(
x,
".*\.(\d+)",
"\1"
)
)
)
}
)

基础解决方案:

# Option 1:
# Resolve the name double vectors: double_vecs => character vector
double_vecs <- names(df1)[vapply(df1, is.double, logical(1))]
# calculate the max number of decimal points in each column: 
# res => named integer vector
res <- setNames(
vapply(
lapply(
data.frame(
Vectorize(gsub)(
".*\.(\d+)",
"\1",
df1[,double_vecs]
)
),
nchar
),
max,
integer(1),
USE.NAMES = FALSE
),
double_vecs
)
# Option 2:
# Resolve the index of double vectors: col_idx => logical vector
col_idx <- vapply(df1, is.double, logical(1))
# Matrix holding values representing the number of characters 
# after a decimal point: len_mat => matrix
len_mat <- apply(
Vectorize(gsub)(
".*\.(\d+)",
"\1",
df1[,col_idx]
),
seq_len(2),
nchar
)
# Get the maximum of each column: res => named integer vector
res <- setNames(
do.call(
pmax, 
c(
as.data.frame(
t(len_mat)
)
)
),
names(df1)[col_idx]
)

最新更新