在R中抓取Zillow,并使用选择器Gadget查找纬度和经度



我正在尝试使用R选择器小工具工具为Zillow房屋web刮取纬度和经度,使用revest和dplyr包。

我试图找到每个清单的纬度和经度,并将其存储到我使用以下代码创建的数据帧中。这就是我现在拥有的。有人能帮忙吗?

link = "https://www.zillow.com/arlington-va/2_p/?searchQueryState=%7B%22pagination%22%3A%7B%22currentPage%22%3A2%7D%2C%22usersSearchTerm%22%3A%22arlington%2C%20virginia%22%2C%22mapBounds%22%3A%7B%22west%22%3A-77.46492611914063%2C%22east%22%3A-76.73708188085938%2C%22south%22%3A38.64364888623124%2C%22north%22%3A39.117234332841704%7D%2C%22regionSelection%22%3A%5B%7B%22regionId%22%3A30258%2C%22regionType%22%3A6%7D%5D%2C%22isMapVisible%22%3Atrue%2C%22filterState%22%3A%7B%22ah%22%3A%7B%22value%22%3Atrue%7D%2C%22sort%22%3A%7B%22value%22%3A%22globalrelevanceex%22%7D%7D%2C%22isListVisible%22%3Atrue%7D"

page = read_html(link)
bed =  page %>% html_nodes(".list-card-details li:nth-child(1)") %>% html_text()
bed =  page %>% html_nodes(".list-card-details li:nth-child(1)") %>% html_text()
bath = page %>% html_nodes(".list-card-details li:nth-child(2)") %>% html_text()
sqfoot = page %>% html_nodes(".list-card-details li:nth-child(3)") %>% html_text()
price = page %>% html_nodes(".list-card-price") %>% html_text()
marketime= page %>% html_nodes(".list-card-variable-text") %>% html_text()
houses = data.frame(address, bed, bath, sqfoot, price, marketime) %>%
mutate(bed = as.numeric(substring(bed, 1, 1)), bath = substring(bath, 1, 1), sqfoot = 
gsub(",","",sqfoot), price = gsub(",", "", price))
houses <- mutate(houses, sqfoot = as.numeric(gsub(" sqft", "", houses$sqfoot)), price = 
as.numeric(substring(price, 2, nchar(houses$price))))

您可以从页面上的脚本标签中提取所有列表信息(尽管我认为zillow做了一个API,这将是一个更好的来源)

library(rvest)
library(purrr)
page <- read_html('https://www.zillow.com/arlington-va/2_p/?searchQueryState=%7B%22pagination%22%3A%7B%22currentPage%22%3A2%7D%2C%22usersSearchTerm%22%3A%22arlington%2C%20virginia%22%2C%22mapBounds%22%3A%7B%22west%22%3A-77.64070736914063%2C%22east%22%3A-76.56130063085938%2C%22south%22%3A38.56616517053261%2C%22north%22%3A39.19411978197601%7D%2C%22regionSelection%22%3A%5B%7B%22regionId%22%3A30258%2C%22regionType%22%3A6%7D%5D%2C%22isMapVisible%22%3Afalse%2C%22filterState%22%3A%7B%22ah%22%3A%7B%22value%22%3Atrue%7D%2C%22sort%22%3A%7B%22value%22%3A%22globalrelevanceex%22%7D%7D%2C%22isListVisible%22%3Atrue%7D')
data <- page %>% html_nodes('.photo-cards script') %>%  html_text() 
info <- map(data, ~jsonlite::parse_json(., simplifyVector = T))
mask <- map(info, ~ 'geo' %in% names(.) ) %>% unlist() 
info <- info[mask] # filter for only those with lat/lon in geo
df <- map_df(info, ~ {
data.frame(
Name = .$name,
Latitude <- .$geo$latitude,
Longitude = .$geo$longitude,
stringsAsFactors = FALSE
)
})

最新更新