我有这个数据帧(为长度道歉):
> dput(df)
structure(c(0, 0, 0.0574480382106154, 0.0800008705393165, 0.0647566062852685,
0.112274391355324, 0.0402639870538404, 0.0553674479763372, 0.0235362130535557,
0.0304701915216926, 0.015957378085698, 0.0259097608149485, 0.00657179802491289,
0.0691642536016694, 0.0274399007093234, 0.00201941365763517,
0, 0.0525198942244426, 0.0994410634344558, 0.0794991280269763,
0.117577302791196, 0.0647283327792206, 0.0503713626773954, 0.0309304830926735,
0.0275762865389375, 0.0216024416811812, 0.0413924289837959, 0.0238831560356344,
0.0605761765047992, 0.0120734733332854, 0.000730020441789363,
0, 0.0598673784821097, 0.130252390852172, 0.087705344115898,
0.131735911815302, 0.02673293529901, 0.0680397854502604, 0.0370463821795181,
0.0471866644226974, 0.0242502641618046, 0.0223128283774105, 0.00320295698962381,
0.0854472401715233, 0.0172538567096209, 0, 0, 0.0693201770743328,
0.106232635645764, 0.0771739973477795, 0.143666981967145, 0.0830532566270858,
0.084725218273522, 0.0279720831185308, 0.0343308898371373, 0.0194570662731681,
0.0330292790488443, 0.0147222990487519, 0.0687246321182713, 0.0501449834879802,
0, 0, 0.0547104289387588, 0.0931951601799034, 0.0889215738111778,
0.121114316483843, 0.082857169908356, 0.0502836748768972, 0.0391059132357561,
0.0424567886056074, 0.0234850554527653, 0.0508665659011156, 0.00822313454289805,
0.0718873290749932, 0.0237327861391292, 0.00792378473591672,
0, 0.0647299636542528, 0.0823013000090186, 0.103643490503831,
0.164563705863324, 0.0857449582769459, 0.100588388374397, 0.0386571819314608,
0.0441666572455667, 0.0270439097770377, 0.0391881679720457, 0.0276437742288209,
0.0957749771131506, 0.0728416538347003, 0.000602586230603257,
0, 0.0899483611807026, 0.119156671504329, 0.126002902890919,
0.189807707317703, 0.0840968961494906, 0.12713870456671, 0.0609388549685219,
0.0513967955718858, 0.0306884323332694, 0.0619828623410178, 0.0117126582962684,
0.137116591284813, 0.0588476614117485, 0.0112014562895064, 0,
0.0864225783049971, 0.11013673763546, 0.13167372732329, 0.219265913810585,
0.0661684619610171, 0.119336146518705, 0.0702378729013129, 0.0663056609650812,
0.0287711787851528, 0.0619649813475264, 0.017198478793494, 0.095900469942417,
0.0543248861892825), .Dim = c(15L, 8L), .Dimnames = list(c("0",
"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12",
"13", "14"), c("V25", "V26", "V27", "V28", "V29", "V30", "V31",
"V32")))
对于,我试图对每一行应用阈值函数,因此每个高于行标准偏差的值将变为1,否则为0。我试过了:
cutoff <- function(x) {
x[x < sd(x)] <- 0
x[x > sd(x)] <- 1
return(x)
}
df.thresh <- t(apply(df, 1, cutoff))
这似乎工作,但由于某种原因,一些值是不变的。在本例中,df.thresh[15,5]
.
这是结果df.thresh
> dput(df.thresh)
structure(c(0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0.0237327861391292,
1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1), .Dim = c(15L, 8L), .Dimnames = list(c("0", "1", "2",
"3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14"
), c("V25", "V26", "V27", "V28", "V29", "V30", "V31", "V32")))
你失败的原因在这里
x[x < sd(x)] <- 0
x[x > sd(x)] <- 1
你迭代地更新了x
中的值,而不是同时更新。在这种情况下,第二行的sd(x)
不再对原始的x
执行sd
。
sd(x)
,例如
cutoff <- function(x) {
th <- sd(x)
x[x < th] <- 0
x[x > th] <- 1
return(x)
}
应该适合你的情况。
试试下面的代码
> +(df > apply(df, 1, sd))
V25 V26 V27 V28 V29 V30 V31 V32
0 0 0 0 0 0 1 0 1
1 0 0 0 0 0 0 0 0
2 1 1 1 1 1 1 1 1
3 1 1 1 1 1 1 1 1
4 1 1 1 1 1 1 1 1
5 1 1 1 1 1 1 1 1
6 1 1 1 1 1 1 1 1
7 1 1 1 1 1 1 1 1
8 1 1 1 1 1 1 1 1
9 1 1 1 1 1 1 1 1
10 1 1 1 1 1 1 1 1
11 1 1 1 1 1 1 1 1
12 0 1 0 1 0 1 1 1
13 1 1 1 1 1 1 1 1
14 1 0 0 1 1 1 1 1
或
cutoff <- function(x) {
+(x >= sd(x))
}
> t(apply(df, 1, cutoff))
V25 V26 V27 V28 V29 V30 V31 V32
0 0 0 0 0 0 1 0 1
1 1 1 1 1 1 1 1 1
2 1 1 1 1 1 1 1 1
3 1 1 1 1 1 1 1 1
4 1 1 1 1 1 1 1 1
5 1 1 1 1 1 1 1 1
6 1 1 1 1 1 1 1 1
7 1 1 1 1 1 1 1 1
8 1 1 1 1 1 1 1 1
9 1 1 1 1 1 1 1 1
10 1 1 1 1 1 1 1 1
11 1 1 1 1 1 1 1 1
12 0 1 0 1 0 1 1 1
13 1 1 1 1 1 1 1 1
14 1 0 0 1 1 1 1 1