我尝试使用arules
包使用离散化函数转换变量。但输出的标签非常尴尬。有人能建议如何将这些标签转换成类似";"低"中等"高";或者简单地为1、2、3。
library(arules)
#> Warning: package 'arules' was built under R version 3.6.3
#> Loading required package: Matrix
#>
#> Attaching package: 'arules'
#> The following objects are masked from 'package:base':
#>
#> abbreviate, write
discretize(iris[,1], breaks = 3)
#> [1] [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [5.4,6.3) [4.3,5.4)
#> [8] [4.3,5.4) [4.3,5.4) [4.3,5.4) [5.4,6.3) [4.3,5.4) [4.3,5.4) [4.3,5.4)
#> [15] [5.4,6.3) [5.4,6.3) [5.4,6.3) [4.3,5.4) [5.4,6.3) [4.3,5.4) [5.4,6.3)
#> [22] [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4)
#> [29] [4.3,5.4) [4.3,5.4) [4.3,5.4) [5.4,6.3) [4.3,5.4) [5.4,6.3) [4.3,5.4)
#> [36] [4.3,5.4) [5.4,6.3) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4)
#> [43] [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4)
#> [50] [4.3,5.4) [6.3,7.9] [6.3,7.9] [6.3,7.9] [5.4,6.3) [6.3,7.9] [5.4,6.3)
#> [57] [6.3,7.9] [4.3,5.4) [6.3,7.9] [4.3,5.4) [4.3,5.4) [5.4,6.3) [5.4,6.3)
#> [64] [5.4,6.3) [5.4,6.3) [6.3,7.9] [5.4,6.3) [5.4,6.3) [5.4,6.3) [5.4,6.3)
#> [71] [5.4,6.3) [5.4,6.3) [6.3,7.9] [5.4,6.3) [6.3,7.9] [6.3,7.9] [6.3,7.9]
#> [78] [6.3,7.9] [5.4,6.3) [5.4,6.3) [5.4,6.3) [5.4,6.3) [5.4,6.3) [5.4,6.3)
#> [85] [5.4,6.3) [5.4,6.3) [6.3,7.9] [6.3,7.9] [5.4,6.3) [5.4,6.3) [5.4,6.3)
#> [92] [5.4,6.3) [5.4,6.3) [4.3,5.4) [5.4,6.3) [5.4,6.3) [5.4,6.3) [5.4,6.3)
#> [99] [4.3,5.4) [5.4,6.3) [6.3,7.9] [5.4,6.3) [6.3,7.9] [6.3,7.9] [6.3,7.9]
#> [106] [6.3,7.9] [4.3,5.4) [6.3,7.9] [6.3,7.9] [6.3,7.9] [6.3,7.9] [6.3,7.9]
#> [113] [6.3,7.9] [5.4,6.3) [5.4,6.3) [6.3,7.9] [6.3,7.9] [6.3,7.9] [6.3,7.9]
#> [120] [5.4,6.3) [6.3,7.9] [5.4,6.3) [6.3,7.9] [6.3,7.9] [6.3,7.9] [6.3,7.9]
#> [127] [5.4,6.3) [5.4,6.3) [6.3,7.9] [6.3,7.9] [6.3,7.9] [6.3,7.9] [6.3,7.9]
#> [134] [6.3,7.9] [5.4,6.3) [6.3,7.9] [6.3,7.9] [6.3,7.9] [5.4,6.3) [6.3,7.9]
#> [141] [6.3,7.9] [6.3,7.9] [5.4,6.3) [6.3,7.9] [6.3,7.9] [6.3,7.9] [6.3,7.9]
#> [148] [6.3,7.9] [5.4,6.3) [5.4,6.3)
#> attr(,"discretized:breaks")
#> [1] 4.3 5.4 6.3 7.9
#> attr(,"discretized:method")
#> [1] frequency
#> Levels: [4.3,5.4) [5.4,6.3) [6.3,7.9]
table(discretize(iris[,1], breaks = 3))
#>
#> [4.3,5.4) [5.4,6.3) [6.3,7.9]
#> 46 53 51
如果我正确阅读了您的目标,您可以使用基本的cut
函数做同样的事情。例如,
cut(iris$Sepal.Length, breaks = c(4.3, 5.4, 6.3, 7.9), labels = c('lo', 'med', 'hi'))
如果您想用切割替换值:
cuts <- cut(iris$Sepal.Length, breaks = c(4.3, 5.4, 6.3, 7.9), labels = c('lo', 'med', 'hi'))
iris$Sepal.Length <- cuts
只需将标签替换为自己的标签即可。
对于一列,您可以执行以下操作:
discretize(iris[,1], breaks = 3,labels=c(letters[1:3]))
对于data.frame,您使用default=
参数传递命令:
discretizeDF(iris, default = list(method = "interval", breaks = 3,labels=1:3))
这些可以在帮助页面中提供的示例中找到。