所以我试图用fviz_nbclust
函数估计集群的实际数量,但它并没有停止向我显示这个错误:
do_one(nmeth(中的错误:外部函数调用(arg 1(中的NA/NaN/Inf
其他警告消息:
1:In stats:dist(x(:由强制引入的NA
2:在存储中。模式(x(<-"双":强制引入的NA
我对数据集的所有列都使用了sum(is.na(stand_numeric_data$variable))
,它对所有变量都返回0,所以我假设我没有NA值。有什么建议吗?我是编程新手,所以任何建议都将不胜感激。
movies_data <- read.csv("movies_metadata.csv", na.string = "True")
only_numeric <- movies_data %>% select(16, 17, 23, 24, 21) #subset of columns
only_numeric <- subset(only_numeric, grepl('^\d+$', only_numeric$revenue))
only_numeric <- subset(only_numeric, grepl('^\d+$', only_numeric$runtime))
only_numeric <- subset(only_numeric, grepl('^\d+$', only_numeric$vote_average))
only_numeric <- subset(only_numeric, grepl('^\d+$', only_numeric$vote_count))
library(caret) #standardization
preproc1 <- preProcess(only_numeric[,c(1:4,5)], method=c("center", "scale"))
stand_numeric_data <- predict(preproc1, only_numeric[,c(1:4,5)])
sum(is.na(stand_numeric_data$revenue))
library(factoextra) #estimate the actual number of clusters
fviz_nbclust(stand_numeric_data, kmeans, method = "wss")
do_one(nmeth(中的错误:外部函数调用(arg 1(中的NA/NaN/Inf
其他警告消息:
1:In stats:dist(x(:由强制引入的NA
2:存储中。模式(x(<-"双":强制引入的NA
dput(head(movies_data, 5))
structure(list(adult = c("False", "False", "False", "False",
"False"), belongs_to_collection = c("{'id': 10194, 'name': 'Toy Story Collection', 'poster_path': '/7G9915LfUQ2lVfwMEEhDsn3kT4B.jpg', 'backdrop_path': '/9FBwqcd9IRruEDUrTdcaafOMKUq.jpg'}",
"", "{'id': 119050, 'name': 'Grumpy Old Men Collection', 'poster_path': '/nLvUdqgPgm3F85NMCii9gVFUcet.jpg', 'backdrop_path': '/hypTnLot2z8wpFS7qwsQHW1uV8u.jpg'}",
"", "{'id': 96871, 'name': 'Father of the Bride Collection', 'poster_path': '/nts4iOmNnq7GNicycMJ9pSAn204.jpg', 'backdrop_path': '/7qwE57OVZmMJChBpLEbJEmzUydk.jpg'}"
), budget = c("30000000", "65000000", "0", "16000000", "0"),
genres = c("[{'id': 16, 'name': 'Animation'}, {'id': 35, 'name': 'Comedy'}, {'id': 10751, 'name': 'Family'}]",
"[{'id': 12, 'name': 'Adventure'}, {'id': 14, 'name': 'Fantasy'}, {'id': 10751, 'name': 'Family'}]",
"[{'id': 10749, 'name': 'Romance'}, {'id': 35, 'name': 'Comedy'}]",
"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'name': 'Drama'}, {'id': 10749, 'name': 'Romance'}]",
"[{'id': 35, 'name': 'Comedy'}]"), homepage = c("http://toystory.disney.com/toy-story",
"", "", "", ""), id = c("862", "8844", "15602", "31357",
"11862"), imdb_id = c("tt0114709", "tt0113497", "tt0113228",
"tt0114885", "tt0113041"), original_language = c("en", "en",
"en", "en", "en"), original_title = c("Toy Story", "Jumanji",
"Grumpier Old Men", "Waiting to Exhale", "Father of the Bride Part II"
), overview = c("Led by Woody, Andy's toys live happily in his room until Andy's birthday brings Buzz Lightyear onto the scene. Afraid of losing his place in Andy's heart, Woody plots against Buzz. But when circumstances separate Buzz and Woody from their owner, the duo eventually learns to put aside their differences.",
"When siblings Judy and Peter discover an enchanted board game that opens the door to a magical world, they unwittingly invite Alan -- an adult who's been trapped inside the game for 26 years -- into their living room. Alan's only hope for freedom is to finish the game, which proves risky as all three find themselves running from giant rhinoceroses, evil monkeys and other terrifying creatures.",
"A family wedding reignites the ancient feud between next-door neighbors and fishing buddies John and Max. Meanwhile, a sultry Italian divorcée opens a restaurant at the local bait shop, alarming the locals who worry she'll scare the fish away. But she's less interested in seafood than she is in cooking up a hot time with Max.",
"Cheated on, mistreated and stepped on, the women are holding their breath, waiting for the elusive "good man" to break a string of less-than-stellar lovers. Friends and confidants Vannah, Bernie, Glo and Robin talk it all out, determined to find a better way to breathe.",
"Just when George Banks has recovered from his daughter's wedding, he receives the news that she's pregnant ... and that George's wife, Nina, is expecting too. He was planning on selling their home, but that's a plan that -- like George -- will have to change with the arrival of both a grandchild and a kid of his own."
), popularity = c("21.946943", "17.015539", "11.7129", "3.859495",
"8.387519"), poster_path = c("/rhIRbceoE9lR4veEXuwCC2wARtG.jpg",
"/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg", "/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg",
"/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg", "/e64sOI48hQXyru7naBFyssKFxVd.jpg"
), production_companies = c("[{'name': 'Pixar Animation Studios', 'id': 3}]",
"[{'name': 'TriStar Pictures', 'id': 559}, {'name': 'Teitler Film', 'id': 2550}, {'name': 'Interscope Communications', 'id': 10201}]",
"[{'name': 'Warner Bros.', 'id': 6194}, {'name': 'Lancaster Gate', 'id': 19464}]",
"[{'name': 'Twentieth Century Fox Film Corporation', 'id': 306}]",
"[{'name': 'Sandollar Productions', 'id': 5842}, {'name': 'Touchstone Pictures', 'id': 9195}]"
), production_countries = c("[{'iso_3166_1': 'US', 'name': 'United States of America'}]",
"[{'iso_3166_1': 'US', 'name': 'United States of America'}]",
"[{'iso_3166_1': 'US', 'name': 'United States of America'}]",
"[{'iso_3166_1': 'US', 'name': 'United States of America'}]",
"[{'iso_3166_1': 'US', 'name': 'United States of America'}]"
), release_date = c("1995-10-30", "1995-12-15", "1995-12-22",
"1995-12-22", "1995-02-10"), revenue = c(373554033, 262797249,
0, 81452156, 76578911), runtime = c(81, 104, 101, 127, 106
), spoken_languages = c("[{'iso_639_1': 'en', 'name': 'English'}]",
"[{'iso_639_1': 'en', 'name': 'English'}, {'iso_639_1': 'fr', 'name': 'Français'}]",
"[{'iso_639_1': 'en', 'name': 'English'}]", "[{'iso_639_1': 'en', 'name': 'English'}]",
"[{'iso_639_1': 'en', 'name': 'English'}]"), status = c("Released",
"Released", "Released", "Released", "Released"), tagline = c("",
"Roll the dice and unleash the excitement!", "Still Yelling. Still Fighting. Still Ready for Love.",
"Friends are the people who let you be yourself... and never let you forget it.",
"Just When His World Is Back To Normal... He's In For The Surprise Of His Life!"
), title = c("Toy Story", "Jumanji", "Grumpier Old Men",
"Waiting to Exhale", "Father of the Bride Part II"), video = c("False",
"False", "False", "False", "False"), vote_average = c(7.7,
6.9, 6.5, 6.1, 5.7), vote_count = c(5415L, 2413L, 92L, 34L,
173L)), row.names = c(NA, 5L), class = "data.frame")
summary(stand_numeric_data)
revenue runtime vote_average vote_count
Min. :-0.1114 Min. :-2.10206 Min. :-1.5192 Min. :-0.1414
1st Qu.:-0.1114 1st Qu.:-0.20831 1st Qu.:-1.5192 1st Qu.:-0.1381
Median :-0.1114 Median : 0.08303 Median : 0.1963 Median :-0.1381
Mean : 0.0000 Mean : 0.00000 Mean : 0.0000 Mean : 0.0000
3rd Qu.:-0.1114 3rd Qu.: 0.37438 3rd Qu.: 0.8825 3rd Qu.:-0.1248
Max. :28.9583 Max. :20.35581 Max. : 1.9118 Max. :29.3968
title
Length:11406
Class :character
Mode :character
我可以使用iris
数据集作为来重现您的错误
library(tidyverse)
library(factoextra)
str(iris) #To see the data types
summary(iris) #To see if there is NAs
#To get the elbow plot use iris data without character column i.e. Species
fviz_nbclust(iris[-5], kmeans, method = "wss")
#Introduce some NAs in iris dataset
df <- iris %>%
mutate(Petal.Length = na_if(Petal.Length, 1.4))
#Now run summary to see NAs
summary(df)
#Now fviz_nbclust gives the error you got
fviz_nbclust(df, kmeans, method = "wss")
do_one(nmeth(中的错误:外部函数调用中的NA/NaN/Inf(arg 1(此外:警告消息:1:在统计数据中:dist(x(:通过强制引入的NA2:存储中。模式(x(<-"双":强制引入的NA
#Remove the rows containing NAs
df1 <- df[complete.cases(df), ]
#See the summary
summary(df1) #NAs are gone
#Scale and center the data
library(caret)
preproc1 <- preProcess(df1[,c(1:4)], method=c("center", "scale"))
stand_numeric_data <- predict(preproc1, df1[,c(1:4)])
#Now run fviz_nbclust without error
fviz_nbclust(stand_numeric_data, kmeans, method = "wss")
在等待合适的数据集之前,根据您的用例调整以下内容,以确定非数值在列中的位置。其中a
是数据帧中的列
library('Hmisc')
a <- c(NA, NA, 2, 3, 'aa')
sapply(a, all.is.numeric)
输出:
<NA> <NA> 2 3 aa
FALSE FALSE TRUE TRUE FALSE
您可以在此处阅读有关all.is.numeric
函数的信息:http://math.furman.edu/~dcs/curses/math47/R/library/Hmisc/html/all.is.numeric.html