r语言 - 有谁知道如何从此图中删除"black lines"?



我正在使用R编程语言,并在这里遵循本教程:https://michael.hahsler.net/SMU/EMIS7332/R/viz_classifier.html。

我模拟了一些数据,并根据教程绘制了结果:

library(cluster)
library(Rtsne)
library(dplyr)
library(randomForest)
library(caret)
library(ggplot2)
library(plotly)

#PART 1 : Create Data
#generate 4 random variables : response_variable ~ var_1 , var_2, var_3
var_1 <- rnorm(10000,1,4)
var_2<-rnorm(10000,10,5)
var_3 <- sample( LETTERS[1:4], 10000, replace=TRUE, prob=c(0.1, 0.2, 0.65, 0.05) )
response_variable <- sample( LETTERS[1:2], 10000, replace=TRUE, prob=c(0.4, 0.6) )

#put them into a data frame called "f"
f <- data.frame(var_1, var_2, var_3, response_variable)
#declare var_3 and response_variable as factors
f$response_variable = as.factor(f$response_variable)
f$var_3 = as.factor(f$var_3)
#create id
f$ID <- seq_along(f[,1])
#PART 2: random forest
#split data into train set and test set
index = createDataPartition(f$response_variable, p=0.7, list = FALSE)
train = f[index,]
test = f[-index,]
#create random forest statistical model
rf = randomForest(response_variable ~ var_1 + var_2 + var_3, data=train, ntree=20, mtry=2)
#have the model predict the test set
pred = predict(rf, test, type = "prob")
labels = as.factor(ifelse(pred[,2]>0.5, "A", "B"))
confusionMatrix(labels, test$response_variable)
#PART 3: Visualize in 2D (source: https://dpmartin42.github.io/posts/r/cluster-mixed-types)
gower_dist <- daisy(test[, -c(4,5)],
metric = "gower")
gower_mat <- as.matrix(gower_dist)
labels = data.frame(labels)
labels$ID = test$ID

tsne_obj <- Rtsne(gower_dist,  is_distance = TRUE)
tsne_data <- tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y")) %>%
mutate(cluster = factor(labels$labels),
name = labels$ID)
plot = ggplot(aes(x = X, y = Y), data = tsne_data) +
geom_point(aes(color = labels$labels))
plotly_plot = ggplotly(plot)

a = tsne_obj$Y
a = data.frame(a)
data = a
data$class = labels$labels

decisionplot <- function(model, data, class = NULL, predict_type = "class",
resolution = 100, showgrid = TRUE, ...) {

if(!is.null(class)) cl <- data[,class] else cl <- 1
data <- data[,1:2]
k <- length(unique(cl))

plot(data, col = as.integer(cl)+1L, pch = as.integer(cl)+1L, ...)

# make grid
r <- sapply(data, range, na.rm = TRUE)
xs <- seq(r[1,1], r[2,1], length.out = resolution)
ys <- seq(r[1,2], r[2,2], length.out = resolution)
g <- cbind(rep(xs, each=resolution), rep(ys, time = resolution))
colnames(g) <- colnames(r)
g <- as.data.frame(g)

### guess how to get class labels from predict
### (unfortunately not very consistent between models)
p <- predict(model, g, type = predict_type)
if(is.list(p)) p <- p$class
p <- as.factor(p)

if(showgrid) points(g, col = as.integer(p)+1L, pch = ".")

z <- matrix(as.integer(p), nrow = resolution, byrow = TRUE)
contour(xs, ys, z, add = TRUE, drawlabels = FALSE,
lwd = 2, levels = (1:(k-1))+.5)

invisible(z)
}

model <- randomForest(class ~ ., data=data, mtry=2, ntrees=500)
final_plot = decisionplot(model, data, class = "class", main = "rf (1)")

现在,我正试图删除";黑线";从这个情节。简而言之,当图像缓冲时,没有黑线。但后来黑线出现了。有人知道如何去掉这些线吗?

我最近的一次是在这里:

plot(data[,c(1:2)], col = data[,3])

但我正在努力保持情节的格式不变(就像黑线出现时一样(。

感谢

如注释所示:(删除轮廓语句(

library(cluster)
library(Rtsne)
library(dplyr)
library(randomForest)
library(caret)
library(ggplot2)
library(plotly)

#PART 1 : Create Data
#generate 4 random variables : response_variable ~ var_1 , var_2, var_3
var_1 <- rnorm(10000,1,4)
var_2<-rnorm(10000,10,5)
var_3 <- sample( LETTERS[1:4], 10000, replace=TRUE, prob=c(0.1, 0.2, 0.65, 0.05) )
response_variable <- sample( LETTERS[1:2], 10000, replace=TRUE, prob=c(0.4, 0.6) )

#put them into a data frame called "f"
f <- data.frame(var_1, var_2, var_3, response_variable)
#declare var_3 and response_variable as factors
f$response_variable = as.factor(f$response_variable)
f$var_3 = as.factor(f$var_3)
#create id
f$ID <- seq_along(f[,1])
#PART 2: random forest
#split data into train set and test set
index = createDataPartition(f$response_variable, p=0.7, list = FALSE)
train = f[index,]
test = f[-index,]
#create random forest statistical model
rf = randomForest(response_variable ~ var_1 + var_2 + var_3, data=train, ntree=20, mtry=2)
#have the model predict the test set
pred = predict(rf, test, type = "prob")
labels = as.factor(ifelse(pred[,2]>0.5, "A", "B"))
confusionMatrix(labels, test$response_variable)
#PART 3: Visualize in 2D (source: https://dpmartin42.github.io/posts/r/cluster-mixed-types)
gower_dist <- daisy(test[, -c(4,5)],
metric = "gower")
gower_mat <- as.matrix(gower_dist)
labels = data.frame(labels)
labels$ID = test$ID

tsne_obj <- Rtsne(gower_dist,  is_distance = TRUE)
tsne_data <- tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y")) %>%
mutate(cluster = factor(labels$labels),
name = labels$ID)
plot = ggplot(aes(x = X, y = Y), data = tsne_data) +
geom_point(aes(color = labels$labels))
plotly_plot = ggplotly(plot)

a = tsne_obj$Y
a = data.frame(a)
data = a
data$class = labels$labels

decisionplot <- function(model, data, class = NULL, predict_type = "class",
resolution = 100, showgrid = TRUE, ...) {

if(!is.null(class)) cl <- data[,class] else cl <- 1
data <- data[,1:2]
k <- length(unique(cl))

plot(data, col = as.integer(cl)+1L, pch = as.integer(cl)+1L, ...)

# make grid
r <- sapply(data, range, na.rm = TRUE)
xs <- seq(r[1,1], r[2,1], length.out = resolution)
ys <- seq(r[1,2], r[2,2], length.out = resolution)
g <- cbind(rep(xs, each=resolution), rep(ys, time = resolution))
colnames(g) <- colnames(r)
g <- as.data.frame(g)

### guess how to get class labels from predict
### (unfortunately not very consistent between models)
p <- predict(model, g, type = predict_type)
if(is.list(p)) p <- p$class
p <- as.factor(p)

if(showgrid) points(g, col = as.integer(p)+1L, pch = ".")

z <- matrix(as.integer(p), nrow = resolution, byrow = TRUE)


invisible(z)
}

model <- randomForest(class ~ ., data=data, mtry=2, ntrees=500)
aaaa = decisionplot(model, data, class = "class", main = "rf (1)")

最新更新