使用R刮擦Power BI仪表板的数据



我正试图使用R中的Selenium在Power BI Dashboard上创建一个数据集的.csv

我在将数据刮到适当的列中时遇到问题,需要滚动查看更多数据才能刮到整个列表。

我目前正在尝试的代码可以从表中提取,但它创建了一个杂乱无章且不完整的数据集。我正在努力寻找一个解决方案,如何干净地刮取这个仪表板,并在我的R代码中添加滚动组件。我使用的代码引用自这篇文章。

这是我当前的R代码:

library(dplyr)
library(purrr)
library(readr)
library(wdman)
library(RSelenium)
library(xml2)
library(selectr)

selServ <- selenium(
port = 4444L,
version = 'latest',
chromever = '105.0.5195.19')
remDr <- remoteDriver(
remoteServerAddr = 'localhost',
port = 4444L,
browserName = 'chrome'
)
remDr$open()
report_url <- "https://app.powerbi.com/view?r=eyJrIjoiNmY4MTQyN2YtNTMyOC00NWMyLTk0ZWUtNDA1ZTllNDZlMTE0IiwidCI6IjZiY2NiNTZkLWI1YTQtNDkzOC05MGRhLTNhNDE4ZjA0MDJjYyIsImMiOjF9&pageName=ReportSection787ec227054b3e646910"
remDr$navigate(report_url)
zipcode_data_table <- read_html(remDr$getPageSource()[[1]]) %>%
querySelector("div.tableEx")
col_headers <- zipcode_data_table %>%
querySelectorAll("div.columnHeaders div.pivotTableCellWrap") %>%
map_chr(xml_text)
zipcode_data <- zipcode_data_table %>%
querySelectorAll("div.bodyCells div.pivotTableCellWrap") %>%
map(xml_parent) %>%
unique() %>%
map(~ .x %>% querySelectorAll("div.pivotTableCellWrap") %>% map_chr(xml_text)) %>%
bind_cols()
df_final <- tibble(final = col_headers, zipcode_data) %>%
type_convert(trim_ws = T, na = c(""))

我已经能够用以下代码提取表中的行:

library(RSelenium)
library(rvest)
library(stringr)
url <- "https://app.powerbi.com/view?r=eyJrIjoiNmY4MTQyN2YtNTMyOC00NWMyLTk0ZWUtNDA1ZTllNDZlMTE0IiwidCI6IjZiY2NiNTZkLWI1YTQtNDkzOC05MGRhLTNhNDE4ZjA0MDJjYyIsImMiOjF9&pageName=ReportSection787ec227054b3e646910"
shell('docker run -d -p 4445:4444 selenium/standalone-firefox')
remDr <- remoteDriver(remoteServerAddr = "localhost", port = 4445L, browserName = "firefox")
remDr$open()
remDr$navigate(url)
Sys.sleep(5)
remDr$mouseMoveToLocation(x = 1035, y = 650)
list_Table <- list()
counter <- 1
for(l in 0 : 30)
{
if(l %% 10 == 0)
{
web_Obj_Table <- remDr$findElement("xpath", '/html/body/div[1]/report-embed/div/div/div[1]/div/div/div/exploration-container/div/docking-container/div/div/div/div/exploration-host/div/div/exploration/div/explore-canvas/div/div[2]/div/div[2]')
vector_Info <- strsplit(web_Obj_Table$getElementText()[[1]], "n")[[1]]
vector_Info <- vector_Info[-(1 : 22)]
nb_Item <- length(vector_Info)
list_Row_Table <- list()
counter_Temp <- 1

for(i in 1 : nb_Item)
{
list_Row_Table[[counter_Temp]] <- ""

if(stringr::str_detect(vector_Info[i], "\d{5,9}") == TRUE)
{
list_Row_Table[[counter_Temp]] <- paste0(list_Row_Table[[counter_Temp]], ";;;", vector_Info[i])
counter_Temp <- counter_Temp + 1    
}else
{
list_Row_Table[[counter_Temp - 1]] <- paste0(list_Row_Table[[counter_Temp - 1]],  ";;;", vector_Info[i])
}
}

list_Row_Table <- lapply(X = list_Row_Table, FUN = function(x) strsplit(x, ";;;")[[1]])
list_Table[[counter]] <- do.call("rbind", list_Row_Table)
counter <- counter + 1
}

print(l)
remDr$doubleclick()
}
list_Table
[[1]]
[,1] [,2]     [,3]                                              [,4]               [,5]          [,6]       [,7]         [,8]     
[1,] ""   " 10100" "Meghna Knit Composite Ltd."                      "Gazipur"          "Bangladesh"  "Gold"     "2023-05-03" "Apparel"
[2,] ""   " 10236" "Sein Together International Philippines Inc."    "Rosario"          "Philippines" "Platinum" "2024-02-22" "Apparel"
[3,] ""   " 10261" "Shiva Kriti Exports"                             "Gurgaon"          "India"       "Gold"     "2023-09-13" "Apparel"
[4,] ""   " 10322" "Multifabs Limited"                               "Konabari"         "Bangladesh"  "Gold"     "2023-07-25" "Apparel"
[5,] ""   " 10325" "El Nasr Clothing And Textiles Company (Kabo)"    "Alexandria"       "Egypt"       "Gold"     "2023-03-14" "Apparel"
[6,] ""   " 10335" "Chi Dat Garment Co. Ltd"                         "Duc Hoa District" "Vietnam"     "Gold"     "2023-05-19" "Apparel"
[7,] ""   " 10582" "Asrotex Ltd."                                    "Gazipur"          "Bangladesh"  "Gold"     "2022-11-24" "Apparel"
[8,] ""   " 10692" "M. I. Industries"                                "Karachi"          "Pakistan"    "Gold"     "2022-11-12" "Apparel"
[9,] ""   " 10721" "Anhui Lujiang Xingshen Leather Co., Ltd."        "Lujiang County"   "China"       "Gold"     "2022-09-28" "Apparel"
[10,] ""   " 10850" "Tosrifa Industries Ltd."                         "Gazipur"          "Bangladesh"  "Gold"     "2023-01-31" "Apparel"
[11,] ""   " 10856" "Best Shirts Ltd."                                "Joydebpur"        "Bangladesh"  "Gold"     "2023-03-14" "Apparel"
[12,] ""   " 10874" "Smart Shirts (Lanka) Limited - Pallekele"        "Pallekele"        "Sri Lanka"   "Gold"     "2023-04-28" "Apparel"
[13,] ""   " 10913" "JiangYin Taidi Garments Co. Ltd"                 "Jiangyin"         "China"       "Gold"     "2023-03-16" "Apparel"
[14,] ""   " 11046" "P.N. Composite Ltd"                              "Gazipur"          "Bangladesh"  "Gold"     "2023-02-08" "Apparel"
[15,] ""   " 11128" "Weihai Cechic Textiles And Garments Co., Ltd."   "Weihai"           "China"       "Gold"     "2023-02-16" "Apparel"
[16,] ""   " 11141" "Jadeluck International Co. Ltd"                  "Hochiminh"        "Vietnam"     "Gold"     "2022-11-18" "Apparel"
[17,] ""   " 11258" "Shengzhou Jialan Garments and Apparel Co., Ltd." "Shengzhou City"   "China"       "Gold"     "2023-03-30" "Apparel"
[18,] ""   " 11392" "Delta Textile Egypt"                             "Nasr City"        "Egypt"       "Gold"     "2022-09-29" "Apparel"
[19,] ""   " 11510" "Konffetty S.A. de C.V."                          "Apopa"            "El Salvador" "Platinum" "2022-10-09" "Apparel"
[20,] ""   " 11586" "Marina Apparels Ltd."                            "Dhaka"            "Bangladesh"  "Gold"     "2022-11-08" "Apparel"
[,9]
[1,] ""  
[2,] ""  
[3,] ""  
[4,] ""  
[5,] ""  
[6,] ""  
[7,] ""  
[8,] ""  
[9,] ""  
[10,] ""  
[11,] ""  
[12,] ""  
[13,] ""  
[14,] ""  
[15,] ""  
[16,] ""  
[17,] ""  
[18,] ""  
[19,] ""  
[20,] " " 
[[2]]
[,1] [,2]      [,3]                                              [,4]                [,5]          [,6]       [,7]         [,8]     
[1,] ""   " 11128"  "Weihai Cechic Textiles And Garments Co., Ltd."   "Weihai"            "China"       "Gold"     "2023-02-16" "Apparel"
[2,] ""   " 11141"  "Jadeluck International Co. Ltd"                  "Hochiminh"         "Vietnam"     "Gold"     "2022-11-18" "Apparel"
[3,] ""   " 11258"  "Shengzhou Jialan Garments and Apparel Co., Ltd." "Shengzhou City"    "China"       "Gold"     "2023-03-30" "Apparel"
[4,] ""   " 11392"  "Delta Textile Egypt"                             "Nasr City"         "Egypt"       "Gold"     "2022-09-29" "Apparel"
[5,] ""   " 11510"  "Konffetty S.A. de C.V."                          "Apopa"             "El Salvador" "Platinum" "2022-10-09" "Apparel"
[6,] ""   " 11586"  "Marina Apparels Ltd."                            "Dhaka"             "Bangladesh"  "Gold"     "2022-11-08" "Apparel"
[7,] ""   " 11640"  "Turag Garments & Hosiery Mills Ltd."             "Gazipur"           "Bangladesh"  "Gold"     "2023-05-13" "Apparel"
[8,] ""   " 11801"  "Dong Bang Vina Co . Ltd"                         "Ho Chi Minh"       "Vietnam"     "Gold"     "2023-01-25" "Apparel"
[9,] ""   " 11899"  "Lexi (Ningbo) Garment Co. Ltd."                  "Ningbo City"       "China"       "Gold"     "2023-09-09" "Apparel"
[10,] ""   " 11992"  "Esses Fashions Limited"                          "Gazipur"           "Bangladesh"  "Gold"     "2023-07-12" "Apparel"
[11,] ""   " 12055"  "MA'AM Arts"                                      "Jaipur"            "India"       "Platinum" "2024-04-06" "Apparel"
[12,] ""   " 12258"  "Taicang Longhui Garment Co. Ltd."                "Taicang"           "China"       "Gold"     "2023-07-27" "Apparel"
[13,] ""   " 12343"  "Umar Textiles"                                   "Karachi"           "Pakistan"    "Gold"     "2023-07-13" "Textile"
[14,] ""   " 124197" "Minh Anh - Do Luong Garment Joint Stock Company" "Do Luong District" "Vietnam"     "Gold"     "2023-03-24" "Apparel"
[15,] ""   " 124214" "Medline Assembly France SAS"                     "Châteaubriant"     "France"      "Gold"     "2023-09-01" "Other"  
[16,] ""   " 125056" "Jiangsu Jiawin Garment Co., Ltd."                "Suqian"            "China"       "Gold"     "2023-08-05" "Apparel"
[17,] ""   " 125516" "Cobes Industries (Bago) Co., Ltd."               "BAGO"              "Myanmar"     "Gold"     "2022-12-09" "Apparel"
[18,] ""   " 125765" "PT SH Garment"                                   "Purwakarta"        "Indonesia"   "Gold"     "2023-09-01" "Apparel"
[19,] ""   " 12663"  "Valmont Fashions Ltd."                           "Gazipur"           "Bangladesh"  "Gold"     "2023-01-25" "Apparel"
[20,] ""   " 126964" "Van Phu Garment Joint Stock Company"             "NA"                "Vietnam"     "Gold"     "2023-09-14" "Apparel"
[,9]
[1,] ""  
[2,] ""  
[3,] ""  
[4,] ""  
[5,] ""  
[6,] ""  
[7,] ""  
[8,] ""  
[9,] ""  
[10,] ""  
[11,] ""  
[12,] ""  
[13,] ""  
[14,] ""  
[15,] ""  
[16,] ""  
[17,] ""  
[18,] ""  
[19,] ""  
[20,] " " 
[[3]]
[,1] [,2]      [,3]                                              [,4]                [,5]         [,6]       [,7]         [,8]     
[1,] ""   " 124197" "Minh Anh - Do Luong Garment Joint Stock Company" "Do Luong District" "Vietnam"    "Gold"     "2023-03-24" "Apparel"
[2,] ""   " 124214" "Medline Assembly France SAS"                     "Châteaubriant"     "France"     "Gold"     "2023-09-01" "Other"  
[3,] ""   " 125056" "Jiangsu Jiawin Garment Co., Ltd."                "Suqian"            "China"      "Gold"     "2023-08-05" "Apparel"
[4,] ""   " 125516" "Cobes Industries (Bago) Co., Ltd."               "BAGO"              "Myanmar"    "Gold"     "2022-12-09" "Apparel"
[5,] ""   " 125765" "PT SH Garment"                                   "Purwakarta"        "Indonesia"  "Gold"     "2023-09-01" "Apparel"
[6,] ""   " 12663"  "Valmont Fashions Ltd."                           "Gazipur"           "Bangladesh" "Gold"     "2023-01-25" "Apparel"
[7,] ""   " 126964" "Van Phu Garment Joint Stock Company"             "NA"                "Vietnam"    "Gold"     "2023-09-14" "Apparel"
[8,] ""   " 12725"  "Kayser-Roth Lumberton"                           "Lumberton"         "USA"        "Platinum" "2023-10-21" "Apparel"
[9,] ""   " 127847" "Sri Senthil Balaji Tex LLP Unit 2"               "Erode"             "India"      "Gold"     "2023-07-15" "Apparel"
[10,] ""   " 12814"  "Kashion Industry Co. Ltd."                       "Ningbo"            "China"      "Gold"     "2023-02-09" "Apparel"
[11,] ""   " 128316" "Sultana Sweaters Ltd."                           "Mymensingh"        "Bangladesh" "Gold"     "2023-01-11" "Apparel"
[12,] ""   " 128553" "Changzhou Dongtai Garment Co., Ltd."             "Changzhou City"    "China"      "Gold"     "2023-02-02" "Apparel"
[13,] ""   " 128684" "HQ Printing Company Limited"                     "Nam Dinh"          "Vietnam"    "Gold"     "2023-04-05" "Other"  
[14,] ""   " 128765" "A I Enterprises Pvt. Ltd."                       "Chennai"           "India"      "Gold"     "2023-07-19" "Apparel"
[15,] ""   " 128780" "Nanchang Hongyang Garment Co., Ltd."             "Nanchang"          "China"      "Gold"     "2023-06-01" "Apparel"
[16,] ""   " 129103" "Trident Limited"                                 "Sehore"            "India"      "Gold"     "2023-07-20" "Apparel"
[17,] ""   " 13226"  "Vinh Thanh Garment Export Co. Ltd."              "Huyen Cu Chi"      "Vietnam"    "Gold"     "2023-07-29" "Apparel"
[18,] ""   " 13319"  "Guangdong Ruiyuan Technology Co. Ltd."           "Puning"            "China"      "Gold"     "2023-04-13" "Apparel"
[19,] ""   " 13684"  "Wuhu Huayang Clothing Group Co., Ltd."           "wuhu"              "China"      "Gold"     "2023-09-14" "Apparel"
[20,] ""   " 13738"  "Sonic Textile Industries"                        "Karachi"           "Pakistan"   "Gold"     "2023-04-26" "Apparel"
[,9]
[1,] ""  
[2,] ""  
[3,] ""  
[4,] ""  
[5,] ""  
[6,] ""  
[7,] ""  
[8,] ""  
[9,] ""  
[10,] ""  
[11,] ""  
[12,] ""  
[13,] ""  
[14,] ""  
[15,] ""  
[16,] ""  
[17,] ""  
[18,] ""  
[19,] ""  
[20,] " " 
[[4]]
[,1] [,2]      [,3]                                                [,4]                  [,5]         [,6]   [,7]        
[1,] ""   " 128780" "Nanchang Hongyang Garment Co., Ltd."               "Nanchang"            "China"      "Gold" "2023-06-01"
[2,] ""   " 129103" "Trident Limited"                                   "Sehore"              "India"      "Gold" "2023-07-20"
[3,] ""   " 13226"  "Vinh Thanh Garment Export Co. Ltd."                "Huyen Cu Chi"        "Vietnam"    "Gold" "2023-07-29"
[4,] ""   " 13319"  "Guangdong Ruiyuan Technology Co. Ltd."             "Puning"              "China"      "Gold" "2023-04-13"
[5,] ""   " 13684"  "Wuhu Huayang Clothing Group Co., Ltd."             "wuhu"                "China"      "Gold" "2023-09-14"
[6,] ""   " 13738"  "Sonic Textile Industries"                          "Karachi"             "Pakistan"   "Gold" "2023-04-26"
[7,] ""   " 13812"  "Ceiba Textiles S de R L"                           "San Pedro Sula"      "Honduras"   "Gold" "2023-04-18"
[8,] ""   " 13831"  "Wuhu Seduno Fashion Co. Ltd"                       "wuhu"                "China"      "Gold" "2023-08-26"
[9,] ""   " 13836"  "Velocity Jeans Egypt for Readymade Garments (ESC)" "Ismailia"            "Egypt"      "Gold" "2023-06-06"
[10,] ""   " 13899"  "Xiamen Yangli Garment Co., Ltd."                   "Xiamen"              "China"      "Gold" "2023-08-22"
[11,] ""   " 13959"  "Fruit of the Loom Textile - Fruit 1"               "Sale"                "Morocco"    "Gold" "2022-12-15"
[12,] ""   " 14005"  "Dragoni Fashions Ltd."                             "Chittagong"          "Bangladesh" "Gold" "2023-09-07"
[13,] ""   " 14177"  "Hwa Meei Optical Co. Ltd"                          "Tainan"              "Taiwan"     "Gold" "2022-12-20"
[14,] ""   " 14253"  "Mastrade International Garments Ltd."              "Gazipur"             "Bangladesh" "Gold" "2023-01-07"
[15,] ""   " 14428"  "Expack Corrugated Cartons PLC"                     "Kelaniya"            "Sri Lanka"  "Gold" "2023-03-04"
[16,] ""   " 14453"  "Nanchang New Huarui Clothing Co. Ltd."             "Nanchang City"       "China"      "Gold" "2022-12-02"
[17,] ""   " 14595"  "Talisman Ltd."                                     "Ashulia Savar Dhaka" "Bangladesh" "Gold" "2023-06-06"
[18,] ""   " 14603"  "Walt Technology Group Co., Ltd."                   "Haining"             "China"      "Gold" "2023-09-21"
[19,] ""   " 14649"  "Columbus Apparel (Cambodia) Inc."                  "Phnom Penh"          "Cambodia"   "Gold" "2023-06-02"
[20,] ""   " 14699"  "Lilly Billy (Thailand) Co. Ltd."                   "Bangkok"             "Thailand"   "Gold" "2023-04-11"
[,8]                [,9]
[1,] "Apparel"           ""  
[2,] "Apparel"           ""  
[3,] "Apparel"           ""  
[4,] "Apparel"           ""  
[5,] "Apparel"           ""  
[6,] "Apparel"           ""  
[7,] "Apparel"           ""  
[8,] "Apparel"           ""  
[9,] "Apparel"           ""  
[10,] "Apparel"           ""  
[11,] "Apparel"           ""  
[12,] "Apparel"           ""  
[13,] "Apparel"           ""  
[14,] "Apparel"           ""  
[15,] "Apparel, Footwear" ""  
[16,] "Apparel"           ""  
[17,] "Apparel"           ""  
[18,] "Apparel"           ""  
[19,] "Apparel"           ""  
[20,] "Apparel"           " "

最新更新