r语言 - RShiny处理来自selectInput的数据帧列名



我正在RShiny中创建一个交互式应用程序,该应用程序可以显示分组箱形图以及显示基于用户上传CSV文件时选择的列的样本组的Kruskal-Wallis测试结果的表。

理想情况下,当用户上传他们的CSV时,他们可以选择他们希望使用哪些列作为X和Y变量、类标签和分组框图和统计计算的组标签。

代码需要尽可能通用,以允许不同的输入列。但是,我很难理解使用输入$colName语法访问所选列中的数据所需的语法。

例如:

当我尝试使用以下dplyr语法获得显示为示例组(输入$groupCol)选择的列中每个组的kruskal-wallis结果的表时,输出是不正确的,因为它不知何缘故没有使用正确的列值:

KWtable <- copyCleanedDF  %>% group_by([copyCleanedDF[,input$groupCol]) %>% kruskal_test([copyCleanedDF[,input$numCol] ~ [copyCleanedDF[,input$varCol])

当我通过首先细分每个组并计算每个组的KW来分解代码时,它可以正确工作,但是它是硬编码的,不会在任何组变量列上工作:

# subset team A and test KW
ASubset <- data.frame(copyCleanedDF[copyCleanedDF[,input$groupCol] %in% "A",])
krusyTeamA <- kruskal_test(ASubset[,input$numCol] ~ ASubset[,input$varCol], data = ASubset)
# add in a column showing it is tested in team A samples only
krusyTeamA$groupVariable = "Team A"
# subset team B and test KW
BSubset <- data.frame(copyCleanedDF[copyCleanedDF[,input$groupCol] %in% "B",])
krusyTeamB <- kruskal_test(BSubset[,input$numCol] ~ BSubset[,input$varCol], data = BSubset)
# add in a column showing it is tested in team B samples only
krusyTeamB$groupVariable = "Team B"
# subset team C and test KW
CSubset <- data.frame(copyCleanedDF[copyCleanedDF[,input$groupCol] %in% "C",])
krusyTeamC <- kruskal_test(CSubset[,input$numCol] ~ CSubset[,input$varCol], data = CSubset)
# add in a column showing it is tested in team C samples only
krusyTeamC$groupVariable = "Team C"
# put the three tables together
KWtable <- rbind(krusyTeamA,krusyTeamB,krusyTeamC)

列名也显得很奇怪,语法显示而不是实际的列名(例如,它显示为cleanData()[,input$varCol)]而不是"Genotype",所以我想更好地了解如何处理这些数据。

我已经包含了完整的Rshiny代码和示例CSV文件,以便能够复制此代码。

library(shiny)
library(datasets)
library(plotly)
library(dplyr)
library(reticulate)
library(DT)
library(ggplot2)
library(tidyverse)
library(rstatix)
library(dplyr)

ui <- shinyUI(fluidPage(
titlePanel("TargetID Median Levels"),
tabsetPanel(
tabPanel("Upload File",
titlePanel("Uploading Files"),
sidebarLayout(
sidebarPanel(
fileInput('file1', 'Browse and select your CSV file',
accept=c('text/csv', 
'text/comma-separated-values,text/plain', 
'.csv')),

# added interface for uploading data from
# http://shiny.rstudio.com/gallery/file-upload.html
tags$br(),
checkboxInput('header', 'Column headers', TRUE),
selectInput('varCol', 'X Variable', ""),
selectInput('numCol', 'Select the Y Variable,...)', "", selected = ""),
selectInput('classCol', 'Select the class label,...)', "", selected = ""),
selectInput('groupCol', 'Select the group label,...)', "", selected = ""),
selectInput("plot.type","Plot Type:",
list(boxplot = "boxplot")#, histogram = "histogram", density = "density")
),

radioButtons('sep', 'Delimiter',
c(Semicolon=';',
Comma=',',
Tab='t'),
','),
radioButtons('quote', 'Quote',
c(None='',
'Double Quote'='"',
'Single Quote'="'"),
'"')

),
mainPanel(

h3("Uploaded data"),

dataTableOutput('table1'),

h3(""),

h3("Boxplot with Median Levels"),

plotlyOutput('MyPlot'),

h3("Kruskal-Wallis H Test"),

dataTableOutput('table2')


)
)
)
)
)
)
server <- shinyServer(function(input, output, session) {
# added "session" because updateSelectInput requires it
options(warn=-1)
# options(encoding="UTF-8")
data <- reactive({ 
req(input$file1) ## ?req #  require that the input is available
# get the input file uploaded to the server side
inFile <- input$file1 
# read the input file as a data frame into R
inputDF <- read.csv(inFile$datapath, header = input$header, sep = input$sep,
quote = input$quote, stringsAsFactors = TRUE)

inputDF[inputDF=="NA"] <- NA # convert missing value strings to NAs recognised by R
# clean up the no-calls and in-phase genotypes from the variant genotypes columns
inputDF[inputDF == "./." | inputDF == ".|."] <- NA # convert the no-calls to missing (NA)
inputDF[inputDF == "0|0"] <- "0/0" # change in-phase wildtype
inputDF[inputDF == "0|1"] <- "0/1" # change in-phase heterzygous
inputDF[inputDF == "1|1"] <- "1/1" # change in-phase homo alt
# Update inputs (you could create an observer with both updateSel...)
# You can also constraint your choices. If you wanted select only numeric
# variables you could set "choices = sapply(df, is.numeric)"
# It depends on what do you want to do later on.
updateSelectInput(session, inputId = 'numCol', label = 'Numerical variable (e.g. LapTime...)',
choices = names(inputDF), selected = names(inputDF)[4])
updateSelectInput(session, inputId = 'varCol', label = 'Sample genotypes for a variant',
choices = names(inputDF), selected = names(inputDF)[1])
updateSelectInput(session, inputId = 'classCol', label = 'Class label (e.g. Sex)',
choices = names(inputDF), selected = names(inputDF)[3])
updateSelectInput(session, inputId = 'groupCol', label = 'Group label (e.g. Team)',
choices = names(inputDF), selected = names(inputDF)[2])
return(inputDF)
})

# display the first output table with the uploaded data
output$table1 <- renderDataTable({
req(input$file1)
datatable(
data(),
filter = "top",
selection = "none", #this is to avoid select rows if you click on the rows
rownames = FALSE,
extensions = 'Buttons',

options = list(
scrollX = TRUE,
autoWidth = TRUE,
dom = 'Blrtip',
buttons =
list(I('colvis'), 'copy', 'print', list(
extend = 'collection',
buttons = list(list(extend = 'csv', filename = "LapTime_variant", title = NULL, exportOptions = list(columns = ":visible")),
list(extend = 'excel', filename = "LapTime_variant", title = NULL, exportOptions = list(columns = ":visible"))),
text = 'Download'
)),
lengthMenu = list(c(10, 30, 50, -1),
c('10', '30', '50', 'All'))
),
class = "display"
)
})
cleanData <- reactive({
req(input$file1)
# save the selected dataframe and subset to have only the selected columns
copyDF <- data.frame(data())
# remove any rows with nas in the 4 selected columns
cleanedDF <- copyDF %>% drop_na(c(input$varCol, input$numCol, input$classCol, input$groupCol))
return(cleanedDF)
})
kwData <- reactive({
req(input$file1)
copyCleanedDF <- data.frame(cleanData())
# get the freq count for each group to plot N
# using dplyr function to create a frequency table to match the grouped plt
# this will enable the freq counts to be added to the plot
myFreqs <- copyCleanedDF %>% group_by(copyCleanedDF[input$groupCol], copyCleanedDF[input$varCol],copyCleanedDF[input$classCol]) %>% summarize(Freq=n())  
groupVariable <- as.character(input$groupCol)
# subset team A and test KW
ASubset <- data.frame(copyCleanedDF[copyCleanedDF[,input$groupCol] %in% "A",])
krusyTeamA <- kruskal_test(ASubset[,input$numCol] ~ ASubset[,input$varCol], data = ASubset)
# add in a column showing it is tested in team A samples only
krusyTeamA$groupVariable = "Team A"
# subset team B and test KW
BSubset <- data.frame(copyCleanedDF[copyCleanedDF[,input$groupCol] %in% "B",])
krusyTeamB <- kruskal_test(BSubset[,input$numCol] ~ BSubset[,input$varCol], data = BSubset)
# add in a column showing it is tested in team B samples only
krusyTeamB$groupVariable = "Team B"
# subset team C and test KW
CSubset <- data.frame(copyCleanedDF[copyCleanedDF[,input$groupCol] %in% "C",])
krusyTeamC <- kruskal_test(CSubset[,input$numCol] ~ CSubset[,input$varCol], data = CSubset)
# add in a column showing it is tested in team C samples only
krusyTeamC$groupVariable = "Team C"
# put the three tables together
KWtable <- rbind(krusyTeamA,krusyTeamB,krusyTeamC)
### this part of the code doesn't work 
#KWtable <- copyCleanedDF  %>% group_by(copyCleanedDF[,input$groupCol]) %>% kruskal_test(copyCleanedDF[,input$numCol] ~ copyCleanedDF[,input$varCol])
# then paste the KW p-value to the team label in the main dataframe
# to include it in the plot image
#plotDF <- merge(copyCleanedDF,KWtable,by=("input$groupCol")) # doesn't work 
#plotDF$input$groupCol <- paste0(plotDF$input$groupCol, "n", plotDF$method, " p=", plotDF$p)
return(KWtable)
})
freqData <- reactive({
req(input$file1)
copyCleanedDF <- data.frame(cleanData())
# get the freq count for each group to plot N
# using dplyr function to create a frequency table to match the grouped plt
# this will enable the freq counts to be added to the plot
myFreqs <- copyCleanedDF %>% group_by(copyCleanedDF[input$groupCol],   copyCleanedDF[input$varCol],copyCleanedDF[input$classCol]) %>% summarize(Freq=n())  
return(myFreqs)
})
# display grouped boxplots
output$MyPlot <- renderPlotly({
req(input$file1)
if(input$plot.type == "boxplot"){
pl <-  ggplot(cleanData(), aes(x=cleanData()[,input$varCol], y=cleanData()[,input$numCol], fill=cleanData()[,input$classCol])) +
stat_boxplot(geom ='errorbar') + # add error bars
geom_boxplot()  + 
facet_grid(~cleanData()[,input$groupCol],scale="free")
pl <- pl + stat_summary(geom = 'text', label = paste("n=", freqData()$Freq), fun = max, vjust = -1, position = position_dodge(width=0.7))

# This is to change the y-axis depending on the plot to allow for N to show on the plot
pl <- pl + scale_y_continuous(limits = function(x){
c(min(x), ceiling(max(x) * 1.1))
})

pl %>%
ggplotly() %>%
layout(boxmode = "group", autosize = TRUE, boxgroupgap=0.002, boxgap=0.01)
}
})
# display the second output table
output$table2 <- renderDataTable({
req(input$file1)
datatable(
kwData(),
filter = "top",
selection = "none", #this is to avoid select rows if you click on the rows
rownames = FALSE,
extensions = 'Buttons',

options = list(
scrollX = TRUE,
autoWidth = TRUE,
dom = 'Blrtip',
buttons =
list(I('colvis'), 'copy', 'print', list(
extend = 'collection',
buttons = list(list(extend = 'csv', filename = "LapTime_variant", title = NULL, exportOptions = list(columns = ":visible")),
list(extend = 'excel', filename = "LapTime_variant", title = NULL, exportOptions = list(columns = ":visible"))),
text = 'Download'
)),
lengthMenu = list(c(10, 30, 50, -1),
c('10', '30', '50', 'All'))
),
class = "display"
)
})

})
shinyApp(ui, server)

有没有人知道发生了什么,我该如何改进我的代码?

提前感谢

CSV文件示例

下面是一个小示例,它将使用像input这样的字符列表来进行分析:

library(rstatix)
library(dplyr)
input <- list(groupCol = 'supp', numCol = 'len', varCol = 'dose')
input
#> $groupCol
#> [1] "supp"
#> 
#> $numCol
#> [1] "len"
#> 
#> $varCol
#> [1] "dose"
ToothGrowth %>% 
group_by(!!sym(input$groupCol)) %>% 
kruskal_test(reformulate(input$varCol, response=input$numCol))
#> # A tibble: 2 × 7
#>   supp  .y.       n statistic    df          p method        
#> * <fct> <chr> <int>     <dbl> <int>      <dbl> <chr>         
#> 1 OJ    len      30      18.5     2 0.0000958  Kruskal-Wallis
#> 2 VC    len      30      25.1     2 0.00000359 Kruskal-Wallis

在2022-05-18由reprex包(v2.0.1)创建

最新更新