我有一个包含27列数值的标题。我想计算小数点后的位数,特别是我想知道每列中可以出现的最大位数
我尝试将数值转换为字符串变量,然后在小数点后的字符串子集,然后计算新字符串的个数,然后找到max。
afterdecimal_val1 <- data %>%
mutate(
across(where(is.numeric), as.character),
) %>%
rowwise() %>%
mutate(
init = str_split(value_1,"[.]"),
init2 = init[2],
init3 = nchar(init2)
) %>%
ungroup() %>%
mutate(init4 = max(init3, na.rm=TRUE))
好吧,这只适用于列"value_1"我很确定这不是最好的方法。
你知道更可行的方法吗?或者:你能帮我升级代码,使其适用于所有27个变量,而不仅仅是"value_1"?
我真的很喜欢用dplyr回答!
谢谢你的帮助!
dpylr
解决方案,由于缺乏MRE而未经测试:
library(dplyr)
library(tidyr)
data %>%
pivot_longer(starts_with("value"), names_to = "variable", values_to = "value") %>%
mutate(N_digits = nchar(gsub(".*\.", "", as.character(value)))) %>%
group_by(variable) %>%
summarise(max_N_digits = max(N_digits)) %>%
pivot_wider(names_from = variable, values_from = max_N_digits)
<标题>编辑这个应该也适用于没有小数点的数字(即给出0):
data1 <- data.frame(value1 = c(1.11, 1.121,1.1212),
value2 = c(6666, 5,5),
value3 = c(1.1111, 1.121,1.12111))
library(dplyr)
library(tidyr)
data1 %>%
pivot_longer(starts_with("value"), names_to = "variable", values_to = "value") %>%
mutate(N_digits = nchar(gsub(".*\.|^[^.]+$", "", as.character(value)))) %>%
group_by(variable) %>%
summarise(max_N_digits = max(N_digits)) %>%
pivot_wider(names_from = variable, values_from = max_N_digits)
的回报:
value1 value2 value3
<int> <int> <int>
1 4 0 5
标题>可以吗?
df1 <- data.frame(c1 = c(1.11, 1.121,1.1212),
c2 = c(1.1, 1.121,1.121),
c3 = c(1.1111, 1.121,1.12111))
apply(apply(df1, 2, function(x) gsub('(^\d)(\.)(\d+)','\3',as.character(x))), 2, function(y) max(nchar(y)))
c1 c2 c3
4 3 5
Tidyverse solutions:
library(tidyverse)
# Option 1 using `purrr::`:
df1 %>%
summarise_if(is.double, ~max(
str_length(
map_chr(
str_split(
as.character(.),
"\.",
),
function(x) x[[2]]
)
)
)
)
# Option 2 using regex:
df1 %>%
summarise_if(
is.double,
function(x){
max(
str_length(
str_replace(
x,
".*\.(\d+)",
"\1"
)
)
)
}
)
基础解决方案:
# Option 1:
# Resolve the name double vectors: double_vecs => character vector
double_vecs <- names(df1)[vapply(df1, is.double, logical(1))]
# calculate the max number of decimal points in each column:
# res => named integer vector
res <- setNames(
vapply(
lapply(
data.frame(
Vectorize(gsub)(
".*\.(\d+)",
"\1",
df1[,double_vecs]
)
),
nchar
),
max,
integer(1),
USE.NAMES = FALSE
),
double_vecs
)
# Option 2:
# Resolve the index of double vectors: col_idx => logical vector
col_idx <- vapply(df1, is.double, logical(1))
# Matrix holding values representing the number of characters
# after a decimal point: len_mat => matrix
len_mat <- apply(
Vectorize(gsub)(
".*\.(\d+)",
"\1",
df1[,col_idx]
),
seq_len(2),
nchar
)
# Get the maximum of each column: res => named integer vector
res <- setNames(
do.call(
pmax,
c(
as.data.frame(
t(len_mat)
)
)
),
names(df1)[col_idx]
)