r语言 - 无法将 k 表示算法应用于我的数据集 do_one(nmeth) 中的错误:外来函数调用中的 NA/NaN/Inf



所以我试图用fviz_nbclust函数估计集群的实际数量,但它并没有停止向我显示这个错误:

do_one(nmeth(中的错误:外部函数调用(arg 1(中的NA/NaN/Inf

其他警告消息:

1:In stats:dist(x(:由强制引入的NA

2:在存储中。模式(x(<-"双":强制引入的NA

我对数据集的所有列都使用了sum(is.na(stand_numeric_data$variable)),它对所有变量都返回0,所以我假设我没有NA值。有什么建议吗?我是编程新手,所以任何建议都将不胜感激。

movies_data <- read.csv("movies_metadata.csv", na.string = "True")
only_numeric <- movies_data %>% select(16, 17, 23, 24, 21) #subset of columns 
only_numeric <- subset(only_numeric, grepl('^\d+$', only_numeric$revenue))
only_numeric <- subset(only_numeric, grepl('^\d+$', only_numeric$runtime))
only_numeric <- subset(only_numeric, grepl('^\d+$', only_numeric$vote_average))
only_numeric <- subset(only_numeric, grepl('^\d+$', only_numeric$vote_count))
library(caret) #standardization
preproc1 <- preProcess(only_numeric[,c(1:4,5)], method=c("center", "scale"))
stand_numeric_data <- predict(preproc1, only_numeric[,c(1:4,5)])
sum(is.na(stand_numeric_data$revenue))
library(factoextra) #estimate the actual number of clusters 
fviz_nbclust(stand_numeric_data, kmeans, method = "wss")

do_one(nmeth(中的错误:外部函数调用(arg 1(中的NA/NaN/Inf

其他警告消息:

1:In stats:dist(x(:由强制引入的NA

2:存储中。模式(x(<-"双":强制引入的NA

dput(head(movies_data, 5))
structure(list(adult = c("False", "False", "False", "False", 
"False"), belongs_to_collection = c("{'id': 10194, 'name': 'Toy Story Collection', 'poster_path': '/7G9915LfUQ2lVfwMEEhDsn3kT4B.jpg', 'backdrop_path': '/9FBwqcd9IRruEDUrTdcaafOMKUq.jpg'}", 
"", "{'id': 119050, 'name': 'Grumpy Old Men Collection', 'poster_path': '/nLvUdqgPgm3F85NMCii9gVFUcet.jpg', 'backdrop_path': '/hypTnLot2z8wpFS7qwsQHW1uV8u.jpg'}", 
"", "{'id': 96871, 'name': 'Father of the Bride Collection', 'poster_path': '/nts4iOmNnq7GNicycMJ9pSAn204.jpg', 'backdrop_path': '/7qwE57OVZmMJChBpLEbJEmzUydk.jpg'}"
), budget = c("30000000", "65000000", "0", "16000000", "0"), 
genres = c("[{'id': 16, 'name': 'Animation'}, {'id': 35, 'name': 'Comedy'}, {'id': 10751, 'name': 'Family'}]", 
"[{'id': 12, 'name': 'Adventure'}, {'id': 14, 'name': 'Fantasy'}, {'id': 10751, 'name': 'Family'}]", 
"[{'id': 10749, 'name': 'Romance'}, {'id': 35, 'name': 'Comedy'}]", 
"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'name': 'Drama'}, {'id': 10749, 'name': 'Romance'}]", 
"[{'id': 35, 'name': 'Comedy'}]"), homepage = c("http://toystory.disney.com/toy-story", 
"", "", "", ""), id = c("862", "8844", "15602", "31357", 
"11862"), imdb_id = c("tt0114709", "tt0113497", "tt0113228", 
"tt0114885", "tt0113041"), original_language = c("en", "en", 
"en", "en", "en"), original_title = c("Toy Story", "Jumanji", 
"Grumpier Old Men", "Waiting to Exhale", "Father of the Bride Part II"
), overview = c("Led by Woody, Andy's toys live happily in his room until Andy's birthday brings Buzz Lightyear onto the scene. Afraid of losing his place in Andy's heart, Woody plots against Buzz. But when circumstances separate Buzz and Woody from their owner, the duo eventually learns to put aside their differences.", 
"When siblings Judy and Peter discover an enchanted board game that opens the door to a magical world, they unwittingly invite Alan -- an adult who's been trapped inside the game for 26 years -- into their living room. Alan's only hope for freedom is to finish the game, which proves risky as all three find themselves running from giant rhinoceroses, evil monkeys and other terrifying creatures.", 
"A family wedding reignites the ancient feud between next-door neighbors and fishing buddies John and Max. Meanwhile, a sultry Italian divorcée opens a restaurant at the local bait shop, alarming the locals who worry she'll scare the fish away. But she's less interested in seafood than she is in cooking up a hot time with Max.", 
"Cheated on, mistreated and stepped on, the women are holding their breath, waiting for the elusive "good man" to break a string of less-than-stellar lovers. Friends and confidants Vannah, Bernie, Glo and Robin talk it all out, determined to find a better way to breathe.", 
"Just when George Banks has recovered from his daughter's wedding, he receives the news that she's pregnant ... and that George's wife, Nina, is expecting too. He was planning on selling their home, but that's a plan that -- like George -- will have to change with the arrival of both a grandchild and a kid of his own."
), popularity = c("21.946943", "17.015539", "11.7129", "3.859495", 
"8.387519"), poster_path = c("/rhIRbceoE9lR4veEXuwCC2wARtG.jpg", 
"/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg", "/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg", 
"/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg", "/e64sOI48hQXyru7naBFyssKFxVd.jpg"
), production_companies = c("[{'name': 'Pixar Animation Studios', 'id': 3}]", 
"[{'name': 'TriStar Pictures', 'id': 559}, {'name': 'Teitler Film', 'id': 2550}, {'name': 'Interscope Communications', 'id': 10201}]", 
"[{'name': 'Warner Bros.', 'id': 6194}, {'name': 'Lancaster Gate', 'id': 19464}]", 
"[{'name': 'Twentieth Century Fox Film Corporation', 'id': 306}]", 
"[{'name': 'Sandollar Productions', 'id': 5842}, {'name': 'Touchstone Pictures', 'id': 9195}]"
), production_countries = c("[{'iso_3166_1': 'US', 'name': 'United States of America'}]", 
"[{'iso_3166_1': 'US', 'name': 'United States of America'}]", 
"[{'iso_3166_1': 'US', 'name': 'United States of America'}]", 
"[{'iso_3166_1': 'US', 'name': 'United States of America'}]", 
"[{'iso_3166_1': 'US', 'name': 'United States of America'}]"
), release_date = c("1995-10-30", "1995-12-15", "1995-12-22", 
"1995-12-22", "1995-02-10"), revenue = c(373554033, 262797249, 
0, 81452156, 76578911), runtime = c(81, 104, 101, 127, 106
), spoken_languages = c("[{'iso_639_1': 'en', 'name': 'English'}]", 
"[{'iso_639_1': 'en', 'name': 'English'}, {'iso_639_1': 'fr', 'name': 'Français'}]", 
"[{'iso_639_1': 'en', 'name': 'English'}]", "[{'iso_639_1': 'en', 'name': 'English'}]", 
"[{'iso_639_1': 'en', 'name': 'English'}]"), status = c("Released", 
"Released", "Released", "Released", "Released"), tagline = c("", 
"Roll the dice and unleash the excitement!", "Still Yelling. Still Fighting. Still Ready for Love.", 
"Friends are the people who let you be yourself... and never let you forget it.", 
"Just When His World Is Back To Normal... He's In For The Surprise Of His Life!"
), title = c("Toy Story", "Jumanji", "Grumpier Old Men", 
"Waiting to Exhale", "Father of the Bride Part II"), video = c("False", 
"False", "False", "False", "False"), vote_average = c(7.7, 
6.9, 6.5, 6.1, 5.7), vote_count = c(5415L, 2413L, 92L, 34L, 
173L)), row.names = c(NA, 5L), class = "data.frame")
summary(stand_numeric_data)
revenue           runtime          vote_average       vote_count     
Min.   :-0.1114   Min.   :-2.10206   Min.   :-1.5192   Min.   :-0.1414  
1st Qu.:-0.1114   1st Qu.:-0.20831   1st Qu.:-1.5192   1st Qu.:-0.1381  
Median :-0.1114   Median : 0.08303   Median : 0.1963   Median :-0.1381  
Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.0000  
3rd Qu.:-0.1114   3rd Qu.: 0.37438   3rd Qu.: 0.8825   3rd Qu.:-0.1248  
Max.   :28.9583   Max.   :20.35581   Max.   : 1.9118   Max.   :29.3968  
title          
Length:11406      
Class :character  
Mode  :character  

我可以使用iris数据集作为来重现您的错误

library(tidyverse)
library(factoextra)
str(iris) #To see the data types
summary(iris) #To see if there is NAs
#To get the elbow plot use iris data without character column i.e. Species
fviz_nbclust(iris[-5], kmeans, method = "wss")
#Introduce some NAs in iris dataset
df <- iris %>% 
mutate(Petal.Length = na_if(Petal.Length, 1.4))
#Now run summary to see NAs
summary(df)
#Now fviz_nbclust gives the error you got
fviz_nbclust(df, kmeans, method = "wss")

do_one(nmeth(中的错误:外部函数调用中的NA/NaN/Inf(arg 1(此外:警告消息:1:在统计数据中:dist(x(:通过强制引入的NA2:存储中。模式(x(<-"双":强制引入的NA

#Remove the rows containing NAs
df1 <- df[complete.cases(df), ]
#See the summary
summary(df1) #NAs are gone
#Scale and center the data
library(caret)
preproc1 <- preProcess(df1[,c(1:4)], method=c("center", "scale"))
stand_numeric_data <- predict(preproc1, df1[,c(1:4)])
#Now run fviz_nbclust without error
fviz_nbclust(stand_numeric_data, kmeans, method = "wss")

在等待合适的数据集之前,根据您的用例调整以下内容,以确定非数值在列中的位置。其中a是数据帧中的列

library('Hmisc')
a <- c(NA, NA, 2, 3, 'aa')
sapply(a, all.is.numeric)

输出:

<NA>  <NA>     2     3    aa 
FALSE FALSE  TRUE  TRUE FALSE 

您可以在此处阅读有关all.is.numeric函数的信息:http://math.furman.edu/~dcs/curses/math47/R/library/Hmisc/html/all.is.numeric.html

最新更新