r语言 - cor()中方法的默认值



我使用GGally::ggscatmat来生成相关矩阵的图。在帮助文件?ggscatmat中,它说它调用cor()来计算相关性。它没有列出默认值。如果method未指定,我开始查看cor以找出默认值。

?corr列出cor(x, y = NULL, use = "everything", method = c("pearson", "kendall", "spearman")),所以为了解释这一点,我试图理解cor本身的功能:

function (x, y = NULL, use = "everything", method = c("pearson", 
"kendall", "spearman")) 
{
na.method <- pmatch(use, c("all.obs", "complete.obs", 
"pairwise.complete.obs", "everything", "na.or.complete"))
if (is.na(na.method)) 
stop("invalid 'use' argument")
method <- match.arg(method)
if (is.data.frame(y)) 
y <- as.matrix(y)
if (is.data.frame(x)) 
x <- as.matrix(x)
if (!is.matrix(x) && is.null(y)) 
stop("supply both 'x' and 'y' or a matrix-like 'x'")
if (!(is.numeric(x) || is.logical(x))) 
stop("'x' must be numeric")
stopifnot(is.atomic(x))
if (!is.null(y)) {
if (!(is.numeric(y) || is.logical(y))) 
stop("'y' must be numeric")
stopifnot(is.atomic(y))
}
Rank <- function(u) {
if (length(u) == 0L) 
u
else if (is.matrix(u)) {
if (nrow(u) > 1L) 
apply(u, 2L, rank, na.last = "keep")
else row(u)
}
else rank(u, na.last = "keep")
}
if (method == "pearson") 
.Call(C_cor, x, y, na.method, FALSE)
else if (na.method %in% c(2L, 5L)) {
if (is.null(y)) {
.Call(C_cor, Rank(na.omit(x)), NULL, na.method, method == 
"kendall")
}
else {
nas <- attr(na.omit(cbind(x, y)), "na.action")
dropNA <- function(x, nas) {
if (length(nas)) {
if (is.matrix(x)) 
x[-nas, , drop = FALSE]
else x[-nas]
}
else x
}
.Call(C_cor, Rank(dropNA(x, nas)), Rank(dropNA(y, 
nas)), na.method, method == "kendall")
}
}
else if (na.method != 3L) {
x <- Rank(x)
if (!is.null(y)) 
y <- Rank(y)
.Call(C_cor, x, y, na.method, method == "kendall")
}
else {
if (is.null(y)) {
ncy <- ncx <- ncol(x)
if (ncx == 0) 
stop("'x' is empty")
r <- matrix(0, nrow = ncx, ncol = ncy)
for (i in seq_len(ncx)) {
for (j in seq_len(i)) {
x2 <- x[, i]
y2 <- x[, j]
ok <- complete.cases(x2, y2)
x2 <- rank(x2[ok])
y2 <- rank(y2[ok])
r[i, j] <- if (any(ok)) 
.Call(C_cor, x2, y2, 1L, method == "kendall")
else NA
}
}
r <- r + t(r) - diag(diag(r))
rownames(r) <- colnames(x)
colnames(r) <- colnames(x)
r
}
else {
if (length(x) == 0L || length(y) == 0L) 
stop("both 'x' and 'y' must be non-empty")
matrix_result <- is.matrix(x) || is.matrix(y)
if (!is.matrix(x)) 
x <- matrix(x, ncol = 1L)
if (!is.matrix(y)) 
y <- matrix(y, ncol = 1L)
ncx <- ncol(x)
ncy <- ncol(y)
r <- matrix(0, nrow = ncx, ncol = ncy)
for (i in seq_len(ncx)) {
for (j in seq_len(ncy)) {
x2 <- x[, i]
y2 <- y[, j]
ok <- complete.cases(x2, y2)
x2 <- rank(x2[ok])
y2 <- rank(y2[ok])
r[i, j] <- if (any(ok)) 
.Call(C_cor, x2, y2, 1L, method == "kendall")
else NA
}
}
rownames(r) <- colnames(x)
colnames(r) <- colnames(y)
if (matrix_result) 
r
else drop(r)
}
}
}
<bytecode: 0x0000024e4d4e22b0>
<environment: namespace:stats>

不幸的是,这使我远远超出了我理解r的能力。谁能解释cor如何决定在没有指定方法的情况下应用哪个方法?

TLDR:列出的第一个,所以" person ">


更多信息:

第8行指定method <- match.arg(method).

这意味着如果用户指定method="something",则使用"something"

但是,如果用户没有指定method参数,则使用默认的method = c("pearson", "kendall", "spearman")。但你会问,是哪一个?这里列出了3个!答案在于match.arg函数是如何工作的。参见?match.arg,它声明第一个元素

在这种情况下,如果调用cor(x, y)而不指定method参数,则它与cor(x, y, method="pearson")相同。@missuse在顶部的评论中提供了一个示例,我复制到了这里:

set.seed(123)
x <- rnorm(100)
y <- rnorm(100)
all.equal( cor(x, y), 
cor(x, y, method = "pearson") )

最新更新