我正试图使用R中的Selenium在Power BI Dashboard上创建一个数据集的.csv
我在将数据刮到适当的列中时遇到问题,需要滚动查看更多数据才能刮到整个列表。
我目前正在尝试的代码可以从表中提取,但它创建了一个杂乱无章且不完整的数据集。我正在努力寻找一个解决方案,如何干净地刮取这个仪表板,并在我的R代码中添加滚动组件。我使用的代码引用自这篇文章。
这是我当前的R代码:
library(dplyr)
library(purrr)
library(readr)
library(wdman)
library(RSelenium)
library(xml2)
library(selectr)
selServ <- selenium(
port = 4444L,
version = 'latest',
chromever = '105.0.5195.19')
remDr <- remoteDriver(
remoteServerAddr = 'localhost',
port = 4444L,
browserName = 'chrome'
)
remDr$open()
report_url <- "https://app.powerbi.com/view?r=eyJrIjoiNmY4MTQyN2YtNTMyOC00NWMyLTk0ZWUtNDA1ZTllNDZlMTE0IiwidCI6IjZiY2NiNTZkLWI1YTQtNDkzOC05MGRhLTNhNDE4ZjA0MDJjYyIsImMiOjF9&pageName=ReportSection787ec227054b3e646910"
remDr$navigate(report_url)
zipcode_data_table <- read_html(remDr$getPageSource()[[1]]) %>%
querySelector("div.tableEx")
col_headers <- zipcode_data_table %>%
querySelectorAll("div.columnHeaders div.pivotTableCellWrap") %>%
map_chr(xml_text)
zipcode_data <- zipcode_data_table %>%
querySelectorAll("div.bodyCells div.pivotTableCellWrap") %>%
map(xml_parent) %>%
unique() %>%
map(~ .x %>% querySelectorAll("div.pivotTableCellWrap") %>% map_chr(xml_text)) %>%
bind_cols()
df_final <- tibble(final = col_headers, zipcode_data) %>%
type_convert(trim_ws = T, na = c(""))
我已经能够用以下代码提取表中的行:
library(RSelenium)
library(rvest)
library(stringr)
url <- "https://app.powerbi.com/view?r=eyJrIjoiNmY4MTQyN2YtNTMyOC00NWMyLTk0ZWUtNDA1ZTllNDZlMTE0IiwidCI6IjZiY2NiNTZkLWI1YTQtNDkzOC05MGRhLTNhNDE4ZjA0MDJjYyIsImMiOjF9&pageName=ReportSection787ec227054b3e646910"
shell('docker run -d -p 4445:4444 selenium/standalone-firefox')
remDr <- remoteDriver(remoteServerAddr = "localhost", port = 4445L, browserName = "firefox")
remDr$open()
remDr$navigate(url)
Sys.sleep(5)
remDr$mouseMoveToLocation(x = 1035, y = 650)
list_Table <- list()
counter <- 1
for(l in 0 : 30)
{
if(l %% 10 == 0)
{
web_Obj_Table <- remDr$findElement("xpath", '/html/body/div[1]/report-embed/div/div/div[1]/div/div/div/exploration-container/div/docking-container/div/div/div/div/exploration-host/div/div/exploration/div/explore-canvas/div/div[2]/div/div[2]')
vector_Info <- strsplit(web_Obj_Table$getElementText()[[1]], "n")[[1]]
vector_Info <- vector_Info[-(1 : 22)]
nb_Item <- length(vector_Info)
list_Row_Table <- list()
counter_Temp <- 1
for(i in 1 : nb_Item)
{
list_Row_Table[[counter_Temp]] <- ""
if(stringr::str_detect(vector_Info[i], "\d{5,9}") == TRUE)
{
list_Row_Table[[counter_Temp]] <- paste0(list_Row_Table[[counter_Temp]], ";;;", vector_Info[i])
counter_Temp <- counter_Temp + 1
}else
{
list_Row_Table[[counter_Temp - 1]] <- paste0(list_Row_Table[[counter_Temp - 1]], ";;;", vector_Info[i])
}
}
list_Row_Table <- lapply(X = list_Row_Table, FUN = function(x) strsplit(x, ";;;")[[1]])
list_Table[[counter]] <- do.call("rbind", list_Row_Table)
counter <- counter + 1
}
print(l)
remDr$doubleclick()
}
list_Table
[[1]]
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] "" " 10100" "Meghna Knit Composite Ltd." "Gazipur" "Bangladesh" "Gold" "2023-05-03" "Apparel"
[2,] "" " 10236" "Sein Together International Philippines Inc." "Rosario" "Philippines" "Platinum" "2024-02-22" "Apparel"
[3,] "" " 10261" "Shiva Kriti Exports" "Gurgaon" "India" "Gold" "2023-09-13" "Apparel"
[4,] "" " 10322" "Multifabs Limited" "Konabari" "Bangladesh" "Gold" "2023-07-25" "Apparel"
[5,] "" " 10325" "El Nasr Clothing And Textiles Company (Kabo)" "Alexandria" "Egypt" "Gold" "2023-03-14" "Apparel"
[6,] "" " 10335" "Chi Dat Garment Co. Ltd" "Duc Hoa District" "Vietnam" "Gold" "2023-05-19" "Apparel"
[7,] "" " 10582" "Asrotex Ltd." "Gazipur" "Bangladesh" "Gold" "2022-11-24" "Apparel"
[8,] "" " 10692" "M. I. Industries" "Karachi" "Pakistan" "Gold" "2022-11-12" "Apparel"
[9,] "" " 10721" "Anhui Lujiang Xingshen Leather Co., Ltd." "Lujiang County" "China" "Gold" "2022-09-28" "Apparel"
[10,] "" " 10850" "Tosrifa Industries Ltd." "Gazipur" "Bangladesh" "Gold" "2023-01-31" "Apparel"
[11,] "" " 10856" "Best Shirts Ltd." "Joydebpur" "Bangladesh" "Gold" "2023-03-14" "Apparel"
[12,] "" " 10874" "Smart Shirts (Lanka) Limited - Pallekele" "Pallekele" "Sri Lanka" "Gold" "2023-04-28" "Apparel"
[13,] "" " 10913" "JiangYin Taidi Garments Co. Ltd" "Jiangyin" "China" "Gold" "2023-03-16" "Apparel"
[14,] "" " 11046" "P.N. Composite Ltd" "Gazipur" "Bangladesh" "Gold" "2023-02-08" "Apparel"
[15,] "" " 11128" "Weihai Cechic Textiles And Garments Co., Ltd." "Weihai" "China" "Gold" "2023-02-16" "Apparel"
[16,] "" " 11141" "Jadeluck International Co. Ltd" "Hochiminh" "Vietnam" "Gold" "2022-11-18" "Apparel"
[17,] "" " 11258" "Shengzhou Jialan Garments and Apparel Co., Ltd." "Shengzhou City" "China" "Gold" "2023-03-30" "Apparel"
[18,] "" " 11392" "Delta Textile Egypt" "Nasr City" "Egypt" "Gold" "2022-09-29" "Apparel"
[19,] "" " 11510" "Konffetty S.A. de C.V." "Apopa" "El Salvador" "Platinum" "2022-10-09" "Apparel"
[20,] "" " 11586" "Marina Apparels Ltd." "Dhaka" "Bangladesh" "Gold" "2022-11-08" "Apparel"
[,9]
[1,] ""
[2,] ""
[3,] ""
[4,] ""
[5,] ""
[6,] ""
[7,] ""
[8,] ""
[9,] ""
[10,] ""
[11,] ""
[12,] ""
[13,] ""
[14,] ""
[15,] ""
[16,] ""
[17,] ""
[18,] ""
[19,] ""
[20,] " "
[[2]]
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] "" " 11128" "Weihai Cechic Textiles And Garments Co., Ltd." "Weihai" "China" "Gold" "2023-02-16" "Apparel"
[2,] "" " 11141" "Jadeluck International Co. Ltd" "Hochiminh" "Vietnam" "Gold" "2022-11-18" "Apparel"
[3,] "" " 11258" "Shengzhou Jialan Garments and Apparel Co., Ltd." "Shengzhou City" "China" "Gold" "2023-03-30" "Apparel"
[4,] "" " 11392" "Delta Textile Egypt" "Nasr City" "Egypt" "Gold" "2022-09-29" "Apparel"
[5,] "" " 11510" "Konffetty S.A. de C.V." "Apopa" "El Salvador" "Platinum" "2022-10-09" "Apparel"
[6,] "" " 11586" "Marina Apparels Ltd." "Dhaka" "Bangladesh" "Gold" "2022-11-08" "Apparel"
[7,] "" " 11640" "Turag Garments & Hosiery Mills Ltd." "Gazipur" "Bangladesh" "Gold" "2023-05-13" "Apparel"
[8,] "" " 11801" "Dong Bang Vina Co . Ltd" "Ho Chi Minh" "Vietnam" "Gold" "2023-01-25" "Apparel"
[9,] "" " 11899" "Lexi (Ningbo) Garment Co. Ltd." "Ningbo City" "China" "Gold" "2023-09-09" "Apparel"
[10,] "" " 11992" "Esses Fashions Limited" "Gazipur" "Bangladesh" "Gold" "2023-07-12" "Apparel"
[11,] "" " 12055" "MA'AM Arts" "Jaipur" "India" "Platinum" "2024-04-06" "Apparel"
[12,] "" " 12258" "Taicang Longhui Garment Co. Ltd." "Taicang" "China" "Gold" "2023-07-27" "Apparel"
[13,] "" " 12343" "Umar Textiles" "Karachi" "Pakistan" "Gold" "2023-07-13" "Textile"
[14,] "" " 124197" "Minh Anh - Do Luong Garment Joint Stock Company" "Do Luong District" "Vietnam" "Gold" "2023-03-24" "Apparel"
[15,] "" " 124214" "Medline Assembly France SAS" "Châteaubriant" "France" "Gold" "2023-09-01" "Other"
[16,] "" " 125056" "Jiangsu Jiawin Garment Co., Ltd." "Suqian" "China" "Gold" "2023-08-05" "Apparel"
[17,] "" " 125516" "Cobes Industries (Bago) Co., Ltd." "BAGO" "Myanmar" "Gold" "2022-12-09" "Apparel"
[18,] "" " 125765" "PT SH Garment" "Purwakarta" "Indonesia" "Gold" "2023-09-01" "Apparel"
[19,] "" " 12663" "Valmont Fashions Ltd." "Gazipur" "Bangladesh" "Gold" "2023-01-25" "Apparel"
[20,] "" " 126964" "Van Phu Garment Joint Stock Company" "NA" "Vietnam" "Gold" "2023-09-14" "Apparel"
[,9]
[1,] ""
[2,] ""
[3,] ""
[4,] ""
[5,] ""
[6,] ""
[7,] ""
[8,] ""
[9,] ""
[10,] ""
[11,] ""
[12,] ""
[13,] ""
[14,] ""
[15,] ""
[16,] ""
[17,] ""
[18,] ""
[19,] ""
[20,] " "
[[3]]
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] "" " 124197" "Minh Anh - Do Luong Garment Joint Stock Company" "Do Luong District" "Vietnam" "Gold" "2023-03-24" "Apparel"
[2,] "" " 124214" "Medline Assembly France SAS" "Châteaubriant" "France" "Gold" "2023-09-01" "Other"
[3,] "" " 125056" "Jiangsu Jiawin Garment Co., Ltd." "Suqian" "China" "Gold" "2023-08-05" "Apparel"
[4,] "" " 125516" "Cobes Industries (Bago) Co., Ltd." "BAGO" "Myanmar" "Gold" "2022-12-09" "Apparel"
[5,] "" " 125765" "PT SH Garment" "Purwakarta" "Indonesia" "Gold" "2023-09-01" "Apparel"
[6,] "" " 12663" "Valmont Fashions Ltd." "Gazipur" "Bangladesh" "Gold" "2023-01-25" "Apparel"
[7,] "" " 126964" "Van Phu Garment Joint Stock Company" "NA" "Vietnam" "Gold" "2023-09-14" "Apparel"
[8,] "" " 12725" "Kayser-Roth Lumberton" "Lumberton" "USA" "Platinum" "2023-10-21" "Apparel"
[9,] "" " 127847" "Sri Senthil Balaji Tex LLP Unit 2" "Erode" "India" "Gold" "2023-07-15" "Apparel"
[10,] "" " 12814" "Kashion Industry Co. Ltd." "Ningbo" "China" "Gold" "2023-02-09" "Apparel"
[11,] "" " 128316" "Sultana Sweaters Ltd." "Mymensingh" "Bangladesh" "Gold" "2023-01-11" "Apparel"
[12,] "" " 128553" "Changzhou Dongtai Garment Co., Ltd." "Changzhou City" "China" "Gold" "2023-02-02" "Apparel"
[13,] "" " 128684" "HQ Printing Company Limited" "Nam Dinh" "Vietnam" "Gold" "2023-04-05" "Other"
[14,] "" " 128765" "A I Enterprises Pvt. Ltd." "Chennai" "India" "Gold" "2023-07-19" "Apparel"
[15,] "" " 128780" "Nanchang Hongyang Garment Co., Ltd." "Nanchang" "China" "Gold" "2023-06-01" "Apparel"
[16,] "" " 129103" "Trident Limited" "Sehore" "India" "Gold" "2023-07-20" "Apparel"
[17,] "" " 13226" "Vinh Thanh Garment Export Co. Ltd." "Huyen Cu Chi" "Vietnam" "Gold" "2023-07-29" "Apparel"
[18,] "" " 13319" "Guangdong Ruiyuan Technology Co. Ltd." "Puning" "China" "Gold" "2023-04-13" "Apparel"
[19,] "" " 13684" "Wuhu Huayang Clothing Group Co., Ltd." "wuhu" "China" "Gold" "2023-09-14" "Apparel"
[20,] "" " 13738" "Sonic Textile Industries" "Karachi" "Pakistan" "Gold" "2023-04-26" "Apparel"
[,9]
[1,] ""
[2,] ""
[3,] ""
[4,] ""
[5,] ""
[6,] ""
[7,] ""
[8,] ""
[9,] ""
[10,] ""
[11,] ""
[12,] ""
[13,] ""
[14,] ""
[15,] ""
[16,] ""
[17,] ""
[18,] ""
[19,] ""
[20,] " "
[[4]]
[,1] [,2] [,3] [,4] [,5] [,6] [,7]
[1,] "" " 128780" "Nanchang Hongyang Garment Co., Ltd." "Nanchang" "China" "Gold" "2023-06-01"
[2,] "" " 129103" "Trident Limited" "Sehore" "India" "Gold" "2023-07-20"
[3,] "" " 13226" "Vinh Thanh Garment Export Co. Ltd." "Huyen Cu Chi" "Vietnam" "Gold" "2023-07-29"
[4,] "" " 13319" "Guangdong Ruiyuan Technology Co. Ltd." "Puning" "China" "Gold" "2023-04-13"
[5,] "" " 13684" "Wuhu Huayang Clothing Group Co., Ltd." "wuhu" "China" "Gold" "2023-09-14"
[6,] "" " 13738" "Sonic Textile Industries" "Karachi" "Pakistan" "Gold" "2023-04-26"
[7,] "" " 13812" "Ceiba Textiles S de R L" "San Pedro Sula" "Honduras" "Gold" "2023-04-18"
[8,] "" " 13831" "Wuhu Seduno Fashion Co. Ltd" "wuhu" "China" "Gold" "2023-08-26"
[9,] "" " 13836" "Velocity Jeans Egypt for Readymade Garments (ESC)" "Ismailia" "Egypt" "Gold" "2023-06-06"
[10,] "" " 13899" "Xiamen Yangli Garment Co., Ltd." "Xiamen" "China" "Gold" "2023-08-22"
[11,] "" " 13959" "Fruit of the Loom Textile - Fruit 1" "Sale" "Morocco" "Gold" "2022-12-15"
[12,] "" " 14005" "Dragoni Fashions Ltd." "Chittagong" "Bangladesh" "Gold" "2023-09-07"
[13,] "" " 14177" "Hwa Meei Optical Co. Ltd" "Tainan" "Taiwan" "Gold" "2022-12-20"
[14,] "" " 14253" "Mastrade International Garments Ltd." "Gazipur" "Bangladesh" "Gold" "2023-01-07"
[15,] "" " 14428" "Expack Corrugated Cartons PLC" "Kelaniya" "Sri Lanka" "Gold" "2023-03-04"
[16,] "" " 14453" "Nanchang New Huarui Clothing Co. Ltd." "Nanchang City" "China" "Gold" "2022-12-02"
[17,] "" " 14595" "Talisman Ltd." "Ashulia Savar Dhaka" "Bangladesh" "Gold" "2023-06-06"
[18,] "" " 14603" "Walt Technology Group Co., Ltd." "Haining" "China" "Gold" "2023-09-21"
[19,] "" " 14649" "Columbus Apparel (Cambodia) Inc." "Phnom Penh" "Cambodia" "Gold" "2023-06-02"
[20,] "" " 14699" "Lilly Billy (Thailand) Co. Ltd." "Bangkok" "Thailand" "Gold" "2023-04-11"
[,8] [,9]
[1,] "Apparel" ""
[2,] "Apparel" ""
[3,] "Apparel" ""
[4,] "Apparel" ""
[5,] "Apparel" ""
[6,] "Apparel" ""
[7,] "Apparel" ""
[8,] "Apparel" ""
[9,] "Apparel" ""
[10,] "Apparel" ""
[11,] "Apparel" ""
[12,] "Apparel" ""
[13,] "Apparel" ""
[14,] "Apparel" ""
[15,] "Apparel, Footwear" ""
[16,] "Apparel" ""
[17,] "Apparel" ""
[18,] "Apparel" ""
[19,] "Apparel" ""
[20,] "Apparel" " "