对r中的矩阵应用逐行阈值函数



我有这个数据帧(为长度道歉):

> dput(df)
structure(c(0, 0, 0.0574480382106154, 0.0800008705393165, 0.0647566062852685, 
0.112274391355324, 0.0402639870538404, 0.0553674479763372, 0.0235362130535557, 
0.0304701915216926, 0.015957378085698, 0.0259097608149485, 0.00657179802491289, 
0.0691642536016694, 0.0274399007093234, 0.00201941365763517, 
0, 0.0525198942244426, 0.0994410634344558, 0.0794991280269763, 
0.117577302791196, 0.0647283327792206, 0.0503713626773954, 0.0309304830926735, 
0.0275762865389375, 0.0216024416811812, 0.0413924289837959, 0.0238831560356344, 
0.0605761765047992, 0.0120734733332854, 0.000730020441789363, 
0, 0.0598673784821097, 0.130252390852172, 0.087705344115898, 
0.131735911815302, 0.02673293529901, 0.0680397854502604, 0.0370463821795181, 
0.0471866644226974, 0.0242502641618046, 0.0223128283774105, 0.00320295698962381, 
0.0854472401715233, 0.0172538567096209, 0, 0, 0.0693201770743328, 
0.106232635645764, 0.0771739973477795, 0.143666981967145, 0.0830532566270858, 
0.084725218273522, 0.0279720831185308, 0.0343308898371373, 0.0194570662731681, 
0.0330292790488443, 0.0147222990487519, 0.0687246321182713, 0.0501449834879802, 
0, 0, 0.0547104289387588, 0.0931951601799034, 0.0889215738111778, 
0.121114316483843, 0.082857169908356, 0.0502836748768972, 0.0391059132357561, 
0.0424567886056074, 0.0234850554527653, 0.0508665659011156, 0.00822313454289805, 
0.0718873290749932, 0.0237327861391292, 0.00792378473591672, 
0, 0.0647299636542528, 0.0823013000090186, 0.103643490503831, 
0.164563705863324, 0.0857449582769459, 0.100588388374397, 0.0386571819314608, 
0.0441666572455667, 0.0270439097770377, 0.0391881679720457, 0.0276437742288209, 
0.0957749771131506, 0.0728416538347003, 0.000602586230603257, 
0, 0.0899483611807026, 0.119156671504329, 0.126002902890919, 
0.189807707317703, 0.0840968961494906, 0.12713870456671, 0.0609388549685219, 
0.0513967955718858, 0.0306884323332694, 0.0619828623410178, 0.0117126582962684, 
0.137116591284813, 0.0588476614117485, 0.0112014562895064, 0, 
0.0864225783049971, 0.11013673763546, 0.13167372732329, 0.219265913810585, 
0.0661684619610171, 0.119336146518705, 0.0702378729013129, 0.0663056609650812, 
0.0287711787851528, 0.0619649813475264, 0.017198478793494, 0.095900469942417, 
0.0543248861892825), .Dim = c(15L, 8L), .Dimnames = list(c("0", 
"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", 
"13", "14"), c("V25", "V26", "V27", "V28", "V29", "V30", "V31", 
"V32")))

对于,我试图对每一行应用阈值函数,因此每个高于行标准偏差的值将变为1,否则为0。我试过了:

cutoff <- function(x) {
x[x < sd(x)] <- 0
x[x > sd(x)] <- 1
return(x)
} 
df.thresh <- t(apply(df, 1, cutoff))

这似乎工作,但由于某种原因,一些值是不变的。在本例中,df.thresh[15,5].

这是结果df.thresh

> dput(df.thresh)
structure(c(0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0.0237327861391292, 
1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1), .Dim = c(15L, 8L), .Dimnames = list(c("0", "1", "2", 
"3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14"
), c("V25", "V26", "V27", "V28", "V29", "V30", "V31", "V32")))

你失败的原因在这里

x[x < sd(x)] <- 0
x[x > sd(x)] <- 1

你迭代地更新了x中的值,而不是同时更新。在这种情况下,第二行的sd(x)不再对原始的x执行sd

因此,您应该使用另一个变量来保存sd(x),例如
cutoff <- function(x) {
th <- sd(x)
x[x < th] <- 0
x[x > th] <- 1
return(x)
}

应该适合你的情况。


试试下面的代码

> +(df > apply(df, 1, sd))
V25 V26 V27 V28 V29 V30 V31 V32
0    0   0   0   0   0   1   0   1
1    0   0   0   0   0   0   0   0
2    1   1   1   1   1   1   1   1
3    1   1   1   1   1   1   1   1
4    1   1   1   1   1   1   1   1
5    1   1   1   1   1   1   1   1
6    1   1   1   1   1   1   1   1
7    1   1   1   1   1   1   1   1
8    1   1   1   1   1   1   1   1
9    1   1   1   1   1   1   1   1
10   1   1   1   1   1   1   1   1
11   1   1   1   1   1   1   1   1
12   0   1   0   1   0   1   1   1
13   1   1   1   1   1   1   1   1
14   1   0   0   1   1   1   1   1

cutoff <- function(x) {
+(x >= sd(x))
}
> t(apply(df, 1, cutoff))
V25 V26 V27 V28 V29 V30 V31 V32
0    0   0   0   0   0   1   0   1
1    1   1   1   1   1   1   1   1
2    1   1   1   1   1   1   1   1
3    1   1   1   1   1   1   1   1
4    1   1   1   1   1   1   1   1
5    1   1   1   1   1   1   1   1
6    1   1   1   1   1   1   1   1
7    1   1   1   1   1   1   1   1
8    1   1   1   1   1   1   1   1
9    1   1   1   1   1   1   1   1
10   1   1   1   1   1   1   1   1
11   1   1   1   1   1   1   1   1
12   0   1   0   1   0   1   1   1
13   1   1   1   1   1   1   1   1
14   1   0   0   1   1   1   1   1

最新更新