readHTMLTable 以整数形式返回数据帧值



有人可以解释为什么下面的标记行返回数字而不是设置该行中的字符串的列名吗?如果我在读取表时包括注释掉的colClasses行,我将如何获得正确的列名?

url<-'http://qpublic7.qpublic.net/ga_subdivison.php?county=ga_clarke&searchType=nbhd&numberValue=4025R&nameValue=&sectionValue=&townshipValue=&rangeValue=&startDate=01-1998&endDate=&startPrice=&endPrice=&startArea=&endArea=&startAcreage=&endAcreage=&saleQualification=All&saleVacant=All&propertyType=All&reasonType=All&start=0'
library(XML)
#colClasses = c("character","character","character","character","integer","integer","integer","character","character","integer","character","character"),
data<-readHTMLTable(url,header=F,as.data.frame=T)
View(data)
csv<-as.data.frame(data)
colnames(csv)<-csv[4,] #why does this line return numbers?
colnames(csv)<-gsub(" ","",colnames(csv))
View(head(csv))
csv<-csv[-c(1:4),]
#####
View(csv)

你被stringsAsFactors咬了一口.另外,您的as.data.frame调用不是正确的做法(查看数据结构与strView)。

library(XML)
URL <- 'http://qpublic7.qpublic.net/ga_subdivison.php?county=ga_clarke&searchType=nbhd&numberValue=4025R&nameValue=&sectionValue=&townshipValue=&rangeValue=&startDate=01-1998&endDate=&startPrice=&endPrice=&startArea=&endArea=&startAcreage=&endAcreage=&saleQualification=All&saleVacant=All&propertyType=All&reasonType=All&start=0'
csv <- readHTMLTable(URL, header=FALSE, as.data.frame=TRUE, stringsAsFactors=FALSE)[[2]]
colnames(csv) <- csv[4,]
colnames(csv) <- gsub(" ", "", colnames(csv))
csv <- csv[-c(1:4),]
dplyr::glimpse(csv)
## Observations: 52
## Variables: 11
## $ /ParcelNumber/ (chr) "173C2 F023", "173C2 G009", "173C2 G007", "17...
## $ PropertyType       (chr) "R", "R", "R", "R", "R", "R", "R", "R", "R"...
## $ SaleDate           (chr) "12-2015", "08-2015", "08-2015", "07-2015",...
## $ SalePrice          (chr) "200,000", "265,000", "210,000", "188,000",...
## $ HeatedSqFt         (chr) "1,538", "1,756", "1,415", "1,125", "1,559"...
## $ Acres              (chr) "0.30", "0.37", "0.37", "0.38", "0.32", "0....
## $ SaleQual           (chr) "Q", "Q", "Q", "Q", "Q", "Q", "U", "Q", "Q"...
## $ Reason             (chr) "FM", "FM", "FM", "FM", "FM", "FM", "B", "F...
## $ YearBuilt          (chr) "1952", "1954", "1963", "1963", "1998", "19...
## $ LocationAddress    (chr) "155 HARDIN DR", "140 HARDIN DR", "150 HARD...
## $ Neighborhood       (chr) "4025R-RIVERDALE", "4025R-RIVERDALE", "4025...

# or use the more modern xml2 ---------------------------------------------
library(xml2)
library(rvest)
pg <- read_html(URL)
csv2 <- html_table(html_nodes(pg, "table")[[2]], fill=TRUE)
colnames(csv2) <- csv[4,]
colnames(csv2) <- gsub(" ", "", colnames(csv))
csv2 <- csv2[-c(1:4), -c(12:13)]
dplyr::glimpse(csv2)
## Observations: 52
## Variables: 11
## $ /ParcelNumber/ (chr) "173C2 F023 ", "173C2 G009 ", "173C2 G007 ", ...
## $ PropertyType       (chr) "R ", "R ", "R ", "R ", "R ", "R ", "R ", "...
## $ SaleDate           (chr) "12-2015 ", "08-2015 ", "08-2015 ", "07-201...
## $ SalePrice          (chr) "200,000 ", "265,000 ", "210,000 ", "188,00...
## $ HeatedSqFt         (chr) "1,538 ", "1,756 ", "1,415 ", "1,125 ", "1,...
## $ Acres              (chr) "0.30 ", "0.37 ", "0.37 ", "0.38 ", "0.32 "...
## $ SaleQual           (chr) "Q ", "Q ", "Q ", "Q ", "Q ", "Q ", "U ", "...
## $ Reason             (chr) "FM ", "FM ", "FM ", "FM ", "FM ", "FM ", "...
## $ YearBuilt          (chr) "1952 ", "1954 ", "1963 ", "1963 ", "1998 "...
## $ LocationAddress    (chr) "155 HARDIN DR ", "140 HARDIN DR ", "150 HA...
## $ Neighborhood       (chr) "4025R-RIVERDALE ", "4025R-RIVERDALE ", "40...

相关内容

  • 没有找到相关文章

最新更新